Compare commits
1156 Commits
drift/adop
...
codex/iwoo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
151cb88c15 | ||
|
|
dc2679ea75 | ||
|
|
4f053d97f8 | ||
|
|
356e4d41cc | ||
|
|
920488c5ff | ||
|
|
d41194683b | ||
|
|
7d30b0342c | ||
|
|
3c7a469ae4 | ||
|
|
ce5da0bfb4 | ||
|
|
2b7768639f | ||
|
|
5a23dec72e | ||
|
|
54a93d29ba | ||
|
|
70dfb2eec3 | ||
|
|
537faf6427 | ||
|
|
25d42f1bf8 | ||
|
|
6add97b9d7 | ||
|
|
5d49719bd4 | ||
|
|
27d2740f29 | ||
|
|
636970a21e | ||
|
|
ff6a7c1611 | ||
|
|
07764ce13f | ||
|
|
364551218d | ||
|
|
9e4c4c955a | ||
|
|
e9977f39c1 | ||
|
|
33601f7b1c | ||
|
|
49387477d2 | ||
|
|
b07debf84d | ||
|
|
c017fcf954 | ||
|
|
6737a3d48b | ||
|
|
7461d4de0e | ||
|
|
56c8a41e5b | ||
|
|
fb9e8bffa6 | ||
|
|
aee3a91f6c | ||
|
|
af70ce8e4f | ||
|
|
59b4943bf9 | ||
|
|
ab780892b6 | ||
|
|
7987da7f3f | ||
|
|
e6a433da22 | ||
|
|
d996426337 | ||
|
|
3e964ee4c1 | ||
|
|
c03a57a184 | ||
|
|
337378e55b | ||
|
|
3c1f94a20a | ||
|
|
8699fe0c7f | ||
|
|
8f73058b93 | ||
|
|
165abaeae7 | ||
|
|
bdcb059444 | ||
|
|
716ed5a77c | ||
|
|
af46941ca5 | ||
|
|
ff4a379192 | ||
|
|
86b6481009 | ||
|
|
a8f6a85002 | ||
|
|
a21f94ced1 | ||
|
|
c6d1106cfd | ||
|
|
88f196a040 | ||
|
|
ccea510e87 | ||
|
|
8043eefffa | ||
|
|
f1e4e3949e | ||
|
|
79c34c4cf9 | ||
|
|
7894156ded | ||
|
|
752de4e1b3 | ||
|
|
aee92bc7a3 | ||
|
|
b92025a829 | ||
|
|
dc4ef7ed34 | ||
|
|
f877e707ce | ||
|
|
497e36ba9d | ||
|
|
2022eaa9e8 | ||
|
|
921af1c4c2 | ||
|
|
ff9c939278 | ||
|
|
aa47f4bc31 | ||
|
|
a28f84722b | ||
|
|
e9a8a2b3e9 | ||
|
|
8d9525fb3b | ||
|
|
5ed5022cd7 | ||
|
|
3d8b395032 | ||
|
|
03f2abf576 | ||
|
|
ebd9ca865f | ||
|
|
5bd5e7e49f | ||
|
|
a169669559 | ||
|
|
75f6929bad | ||
|
|
12a3be5f2d | ||
|
|
eedc69909e | ||
|
|
05e87fa91f | ||
|
|
f9a62206ed | ||
|
|
50c9d51df9 | ||
|
|
872d1aa5e4 | ||
|
|
f615ac506e | ||
|
|
e8bf5ba55c | ||
|
|
697fff96d8 | ||
|
|
0db345418f | ||
|
|
42fd9827f5 | ||
|
|
a3479b3254 | ||
|
|
a183dc9b8f | ||
|
|
8b8773ab7b | ||
|
|
4744670e4e | ||
|
|
8c40621d42 | ||
|
|
273071b654 | ||
|
|
1697d91a68 | ||
|
|
1a72a2f664 | ||
|
|
db48ad8678 | ||
|
|
c50da9a2b3 | ||
|
|
e2ab879636 | ||
|
|
943a6feacf | ||
|
|
7b2efc14c4 | ||
|
|
126316a414 | ||
|
|
e1355c8e04 | ||
|
|
dad8c0fbfc | ||
|
|
28cd4b01fe | ||
|
|
57b21a4399 | ||
|
|
8ba6a1c08e | ||
|
|
d6a6519594 | ||
|
|
cd17a67774 | ||
|
|
656c90e01d | ||
|
|
e45e52e526 | ||
|
|
46cc56c3ce | ||
|
|
9080ba3670 | ||
|
|
742980f398 | ||
|
|
3fc9460eef | ||
|
|
b7b4eb53b5 | ||
|
|
83e27fa2b2 | ||
|
|
ca2d95e9f2 | ||
|
|
514c201ff4 | ||
|
|
a192e5f56b | ||
|
|
da519423e1 | ||
|
|
04ac5085cd | ||
|
|
4ea6fb98a6 | ||
|
|
ae7b39d96a | ||
|
|
70637ec871 | ||
|
|
9e093a9525 | ||
|
|
f0a77d79f4 | ||
|
|
d7db0faa4d | ||
|
|
2828865699 | ||
|
|
0836066265 | ||
|
|
92316dda04 | ||
|
|
aeaa77bbe1 | ||
|
|
d6d2719e02 | ||
|
|
badff58cc3 | ||
|
|
7d2128b53c | ||
|
|
aebd1b5b4f | ||
|
|
845e14b8b0 | ||
|
|
1b28dcf3f9 | ||
|
|
5f69416eec | ||
|
|
a842e53332 | ||
|
|
b39fded8c7 | ||
|
|
01c6cb2941 | ||
|
|
5cfee5cf1b | ||
|
|
320718aa36 | ||
|
|
8305454f37 | ||
|
|
81f4751cee | ||
|
|
15f9d3aff5 | ||
|
|
63d0fc6333 | ||
|
|
6aec9489d4 | ||
|
|
87545bc7dd | ||
|
|
bda2f7a0ca | ||
|
|
55d1df24e7 | ||
|
|
a03c5541a4 | ||
|
|
68d01d147b | ||
|
|
f0f4ac2a43 | ||
|
|
8a71934e47 | ||
|
|
dcd8e71a0f | ||
|
|
7870489b08 | ||
|
|
0a2abe81c0 | ||
|
|
50091485a9 | ||
|
|
e28079109c | ||
|
|
480292b04d | ||
|
|
b019a982d8 | ||
|
|
7cfe62313d | ||
|
|
c7cd307422 | ||
|
|
0a981a5990 | ||
|
|
eb6308f7b5 | ||
|
|
88b19259c5 | ||
|
|
a21cb05af3 | ||
|
|
3953ef6d57 | ||
|
|
6112fd07ae | ||
|
|
48a7228fff | ||
|
|
f6b8a91cd0 | ||
|
|
fd253bc93c | ||
|
|
b691367d40 | ||
|
|
c7e26d698c | ||
|
|
5845fa80a4 | ||
|
|
704ed5e0ba | ||
|
|
44f48b68fe | ||
|
|
2c058e5adf | ||
|
|
5f783d5a58 | ||
|
|
b2fc03d09f | ||
|
|
6a379862e7 | ||
|
|
bb1a0722b3 | ||
|
|
32e172ed8b | ||
|
|
f52fdebe0a | ||
|
|
14b617e242 | ||
|
|
dcde86c7f9 | ||
|
|
101b08946a | ||
|
|
5d22f59dde | ||
|
|
345c6781b8 | ||
|
|
900fee47c9 | ||
|
|
1396f1da56 | ||
|
|
9e15fd08b3 | ||
|
|
9ec584943a | ||
|
|
0778a448d8 | ||
|
|
d50de0fa6e | ||
|
|
a8b7299d1c | ||
|
|
f30405997d | ||
|
|
f743321ba8 | ||
|
|
c644cfe993 | ||
|
|
640e35977f | ||
|
|
d004561617 | ||
|
|
9b802aa7c6 | ||
|
|
d0084a5f44 | ||
|
|
0172d3cfa6 | ||
|
|
23fc499b97 | ||
|
|
c792f37440 | ||
|
|
ea151ea54f | ||
|
|
411c0b2bc0 | ||
|
|
41856b2e9b | ||
|
|
5f1c33d73a | ||
|
|
5d05aa38c5 | ||
|
|
72c4ccbf86 | ||
|
|
6e122f0b58 | ||
|
|
44d24b1858 | ||
|
|
0c1f9a1e37 | ||
|
|
449c4ac807 | ||
|
|
b7ee1f47ff | ||
|
|
6116498a32 | ||
|
|
f84482299b | ||
|
|
2e0d7f65c1 | ||
|
|
3fa628417e | ||
|
|
b30005f4c1 | ||
|
|
c38a3a9794 | ||
|
|
48a31ea2b9 | ||
|
|
683984dc47 | ||
|
|
a64145fddf | ||
|
|
ffe479dbcc | ||
|
|
d6d7c27152 | ||
|
|
a8c0ee2af1 | ||
|
|
cd5cabd952 | ||
|
|
6b28e1ecc1 | ||
|
|
bd5340cfe1 | ||
|
|
63b4c3453f | ||
|
|
e5cd01c9cb | ||
|
|
24d9f25fe7 | ||
|
|
67296746c0 | ||
|
|
e570d9f6a9 | ||
|
|
62b07a95ff | ||
|
|
463229848c | ||
|
|
ed3e658578 | ||
|
|
19d306c720 | ||
|
|
1cb480427e | ||
|
|
b9fc8748a5 | ||
|
|
fe3f1e39fc | ||
|
|
58909a5c31 | ||
|
|
9ccf230a5f | ||
|
|
b9356ba1f4 | ||
|
|
2dcd214156 | ||
|
|
8a78344bcc | ||
|
|
6f1e788b67 | ||
|
|
3aed1f3123 | ||
|
|
979eb0fdd0 | ||
|
|
a909bc2ce9 | ||
|
|
5298786180 | ||
|
|
46292459b7 | ||
|
|
f169085cd3 | ||
|
|
4edcb5b586 | ||
|
|
e1e640f5d5 | ||
|
|
814a44d539 | ||
|
|
3ca834c31d | ||
|
|
04684eef5f | ||
|
|
1c8ebdf283 | ||
|
|
c573fd42dd | ||
|
|
dd1c513841 | ||
|
|
0a845498ff | ||
|
|
753879b45f | ||
|
|
ca0045eeeb | ||
|
|
01284d1e4f | ||
|
|
9aba9974e6 | ||
|
|
daf9d4b00b | ||
|
|
4818ba45c0 | ||
|
|
1bee07e765 | ||
|
|
263d752367 | ||
|
|
862f35fee7 | ||
|
|
42efb2fbe8 | ||
|
|
eeece58c0d | ||
|
|
b466674621 | ||
|
|
386468305e | ||
|
|
383a29a139 | ||
|
|
b184a09086 | ||
|
|
ea75ea4633 | ||
|
|
73aad41359 | ||
|
|
390b13e873 | ||
|
|
156660929e | ||
|
|
2c2446e56e | ||
|
|
fcaaad8708 | ||
|
|
760d6745a5 | ||
|
|
318ca645d0 | ||
|
|
a76c5e0801 | ||
|
|
ac4686615f | ||
|
|
ede2b3752b | ||
|
|
825de2ef58 | ||
|
|
4cfc6a4c79 | ||
|
|
1a4ac330b1 | ||
|
|
c16b2931e8 | ||
|
|
0e447bbe47 | ||
|
|
0a8a15075a | ||
|
|
bd2762e76c | ||
|
|
a68bc7f024 | ||
|
|
ded2223d14 | ||
|
|
f4253f22f8 | ||
|
|
63be59ef8a | ||
|
|
0c447acb19 | ||
|
|
d04377dd20 | ||
|
|
beb1c9006b | ||
|
|
a0ac6c090a | ||
|
|
943093a49b | ||
|
|
fb40b8f469 | ||
|
|
63642f3dcb | ||
|
|
630cd5381c | ||
|
|
00cf6f009d | ||
|
|
cda1f86633 | ||
|
|
9bdeebeb1e | ||
|
|
7bb03652f2 | ||
|
|
96d812b7cc | ||
|
|
9b01f1fa46 | ||
|
|
5b8f14e32e | ||
|
|
841b057ada | ||
|
|
b87090be01 | ||
|
|
c9b2e763f5 | ||
|
|
de68514283 | ||
|
|
7fd52d26b5 | ||
|
|
9d89cdddea | ||
|
|
5dacdb4738 | ||
|
|
1a6ce1bcd4 | ||
|
|
0423c43b84 | ||
|
|
0b2657e546 | ||
|
|
1322216f73 | ||
|
|
4874f2b649 | ||
|
|
cd81d604d9 | ||
|
|
dc09dac4d4 | ||
|
|
17b62da59a | ||
|
|
b98f93a62f | ||
|
|
a282eb8c97 | ||
|
|
6a41f1c22f | ||
|
|
4d622f184d | ||
|
|
9281c11eea | ||
|
|
6428a15a11 | ||
|
|
478e25b6a2 | ||
|
|
82e471a7f2 | ||
|
|
bca493e83c | ||
|
|
df922e8c67 | ||
|
|
05dd8450a8 | ||
|
|
54f227c597 | ||
|
|
12c39a17a8 | ||
|
|
80ccf8c16f | ||
|
|
bdccb80ed7 | ||
|
|
b17acbb043 | ||
|
|
df06c025ff | ||
|
|
b20daeabd8 | ||
|
|
c932635057 | ||
|
|
9bac5718da | ||
|
|
06dfdf7ead | ||
|
|
7211d0b7f2 | ||
|
|
22a4b44aef | ||
|
|
f3b85cda4f | ||
|
|
19de834557 | ||
|
|
a6328c3864 | ||
|
|
abcca6521c | ||
|
|
8558ac2d20 | ||
|
|
6d2b0ed4cd | ||
|
|
4407b46bb6 | ||
|
|
22b45006b7 | ||
|
|
8ddc783af5 | ||
|
|
5ed577481f | ||
|
|
f322781798 | ||
|
|
f5f3a10bf6 | ||
|
|
a5ed12937c | ||
|
|
4bdb012caa | ||
|
|
0c59a1aafd | ||
|
|
77e443a681 | ||
|
|
8e68dc1e35 | ||
|
|
4887708717 | ||
|
|
460cc19e76 | ||
|
|
4d6f7225d9 | ||
|
|
da8456cf07 | ||
|
|
5aa46bc95e | ||
|
|
9b465ee140 | ||
|
|
19739339e7 | ||
|
|
7ed4b19b0c | ||
|
|
d3d1c2c27a | ||
|
|
7cc898caf1 | ||
|
|
75f1ef0ca1 | ||
|
|
e4c3662814 | ||
|
|
918e918641 | ||
|
|
2603e43bf2 | ||
|
|
12adc1e364 | ||
|
|
c44188b8ba | ||
|
|
251f5ad658 | ||
|
|
b3ab4da03b | ||
|
|
8164121870 | ||
|
|
290f409d80 | ||
|
|
b63c829f9a | ||
|
|
efc454a346 | ||
|
|
6725aaae5b | ||
|
|
d94f427a09 | ||
|
|
0fc66370c7 | ||
|
|
59d1708034 | ||
|
|
ce3f2fed36 | ||
|
|
be585c4071 | ||
|
|
992bb05e6b | ||
|
|
140c9cdaef | ||
|
|
e89bb267ea | ||
|
|
39f0f7655c | ||
|
|
ebb73af16b | ||
|
|
2380d6f555 | ||
|
|
9206e27103 | ||
|
|
9c966699f0 | ||
|
|
3d1315e103 | ||
|
|
b0f9ab70d2 | ||
|
|
53a3c846e5 | ||
|
|
1ae8f0d179 | ||
|
|
7ae59c1cb0 | ||
|
|
867e0e73df | ||
|
|
89a5a2ea85 | ||
|
|
4b6c9b9554 | ||
|
|
7f91159a1c | ||
|
|
31b95449ff | ||
|
|
bbe081fc57 | ||
|
|
8adae4788c | ||
|
|
7b36864cca | ||
|
|
3f5fb9d8b2 | ||
|
|
b15b61d90b | ||
|
|
50993a4566 | ||
|
|
5aaf4f4148 | ||
|
|
efb38cf6af | ||
|
|
ac7f642e41 | ||
|
|
593d928dea | ||
|
|
fe3bf5dc18 | ||
|
|
d25237a31f | ||
|
|
242b2f415d | ||
|
|
88e7477a7c | ||
|
|
ee5a54ecba | ||
|
|
1c5781018c | ||
|
|
f671637e23 | ||
|
|
72043adac1 | ||
|
|
b5deca91df | ||
|
|
2e54b803f0 | ||
|
|
cf8bb364a3 | ||
|
|
a2cbf9e328 | ||
|
|
508df4c732 | ||
|
|
f3fbd39898 | ||
|
|
e6cc008b87 | ||
|
|
b7aa90ae33 | ||
|
|
ef95d1ef6b | ||
|
|
26cab7a324 | ||
|
|
deccae937d | ||
|
|
017d57c96a | ||
|
|
6003fd03ec | ||
|
|
31cae35edd | ||
|
|
71380224b6 | ||
|
|
ced36f2521 | ||
|
|
b1f666826f | ||
|
|
4ee9689483 | ||
|
|
ae9d0b7385 | ||
|
|
4a9d76d29e | ||
|
|
b7bab4abcc | ||
|
|
c2bf579a99 | ||
|
|
d84bae95cf | ||
|
|
eea9c82f91 | ||
|
|
49ad1cfb1a | ||
|
|
31a49c72de | ||
|
|
2d37149eaf | ||
|
|
3aa90b8ecf | ||
|
|
a60896bd78 | ||
|
|
f79e671819 | ||
|
|
d4573cd00a | ||
|
|
312042ae6d | ||
|
|
fb9c7d930c | ||
|
|
c426b1ce7b | ||
|
|
f85a876868 | ||
|
|
543c938956 | ||
|
|
2eaffe07aa | ||
|
|
b9a0f289b2 | ||
|
|
5b699ec312 | ||
|
|
0870cdf789 | ||
|
|
076946412e | ||
|
|
ed3a16468a | ||
|
|
72af10b43b | ||
|
|
ef811c979b | ||
|
|
4956fbb849 | ||
|
|
1b525b7c18 | ||
|
|
598f33ae8b | ||
|
|
ce0d6a75c4 | ||
|
|
cbb0221f0f | ||
|
|
f542aa52f0 | ||
|
|
89f397594e | ||
|
|
6e5d68eebc | ||
|
|
8fa8d690a2 | ||
|
|
60f7dc23d3 | ||
|
|
426f0dedad | ||
|
|
5bc346b97e | ||
|
|
1d6636cd0d | ||
|
|
20026d4671 | ||
|
|
0c1f126479 | ||
|
|
1faaaf8fbc | ||
|
|
a0e56bbaad | ||
|
|
93070600b4 | ||
|
|
55e642eeaf | ||
|
|
739a8e0f78 | ||
|
|
4a24d3e4fc | ||
|
|
e7691a1f15 | ||
|
|
edb6daef88 | ||
|
|
9b0f68f6c4 | ||
|
|
d19f6ad7a9 | ||
|
|
8a3069755d | ||
|
|
14697ba20e | ||
|
|
967d4b77b6 | ||
|
|
5fe9f725aa | ||
|
|
584d2a77ff | ||
|
|
83ca72e989 | ||
|
|
42b668bbff | ||
|
|
ba904ec4a1 | ||
|
|
839b3ea960 | ||
|
|
b7eb3f7da2 | ||
|
|
d283e65340 | ||
|
|
5ac315c119 | ||
|
|
3c9404d241 | ||
|
|
c8a995aff2 | ||
|
|
101cd42974 | ||
|
|
7569cff19e | ||
|
|
0cd6301d0e | ||
|
|
65badab6fd | ||
|
|
d4e94e88c4 | ||
|
|
04ab2901cc | ||
|
|
3ea90aa331 | ||
|
|
855716b5b8 | ||
|
|
9c122a4a37 | ||
|
|
07744bf83d | ||
|
|
8342cfa460 | ||
|
|
ac0d2329f7 | ||
|
|
de6dbe07c9 | ||
|
|
53f8737546 | ||
|
|
edf97ad8ca | ||
|
|
bda857a8f3 | ||
|
|
ac91ba3e17 | ||
|
|
e2a2e03c79 | ||
|
|
955dbce670 | ||
|
|
9e9b30689f | ||
|
|
2f68b3f472 | ||
|
|
271aadcefe | ||
|
|
b85ab70c45 | ||
|
|
aee0a70021 | ||
|
|
c99be252d3 | ||
|
|
3b50ff3cc3 | ||
|
|
17fbd1a567 | ||
|
|
4452a006bf | ||
|
|
7dc724c9d4 | ||
|
|
a4fe31218b | ||
|
|
61d82b3ad3 | ||
|
|
6ea041d463 | ||
|
|
6f6cf90a17 | ||
|
|
c516f9fc71 | ||
|
|
f0a9b1e00a | ||
|
|
477a7d46a8 | ||
|
|
bf8974be03 | ||
|
|
81ac1f0f55 | ||
|
|
795c9a4e93 | ||
|
|
038f1a0d6d | ||
|
|
d6c941ea39 | ||
|
|
842069a1fd | ||
|
|
3be2c9695a | ||
|
|
8272047371 | ||
|
|
0adebd1add | ||
|
|
169e828ebb | ||
|
|
947a84e6c1 | ||
|
|
dc34e81224 | ||
|
|
815dcf370f | ||
|
|
170f927bc6 | ||
|
|
570b99e9fd | ||
|
|
56a8085dcf | ||
|
|
3477c7569a | ||
|
|
11842170df | ||
|
|
a379a80ce1 | ||
|
|
a0ca2ccb7f | ||
|
|
4de626fcd5 | ||
|
|
35fe37c82a | ||
|
|
8a0a3f89aa | ||
|
|
1b09a64e01 | ||
|
|
45cd55b2da | ||
|
|
5fa0e1452c | ||
|
|
36aeea80a3 | ||
|
|
1d285dd9d4 | ||
|
|
f9d53469f9 | ||
|
|
db4fa420ea | ||
|
|
3514ff38fe | ||
|
|
6da0c3969b | ||
|
|
ab2862a214 | ||
|
|
d0835a7be1 | ||
|
|
50833a0efb | ||
|
|
8234a3ee5b | ||
|
|
10f2f1abaf | ||
|
|
504d038a9e | ||
|
|
1333d24040 | ||
|
|
aa330339b8 | ||
|
|
a0f41658db | ||
|
|
4f151f5da5 | ||
|
|
784ebf49ef | ||
|
|
30b2f5bd6e | ||
|
|
383cc6ab2a | ||
|
|
109f55a12b | ||
|
|
c06d518254 | ||
|
|
3e94fba7e8 | ||
|
|
64b34828a7 | ||
|
|
5bf49f81be | ||
|
|
cc4b16c027 | ||
|
|
a9e7b5f656 | ||
|
|
01ba1e6f13 | ||
|
|
2c4e8bb666 | ||
|
|
107c4f11cc | ||
|
|
9cfae83da3 | ||
|
|
77d85b33c6 | ||
|
|
9843c59450 | ||
|
|
1ca4912270 | ||
|
|
69ed35fb5e | ||
|
|
fa9d2a5d5f | ||
|
|
0b5268a666 | ||
|
|
55ab8732c5 | ||
|
|
12fa97759b | ||
|
|
0367dde686 | ||
|
|
fb9b0b3b7c | ||
|
|
0028993851 | ||
|
|
5c934de83d | ||
|
|
d1ebcdac10 | ||
|
|
51660ecbb1 | ||
|
|
bc99683432 | ||
|
|
b50614528e | ||
|
|
bbf5105fb4 | ||
|
|
d321f44e49 | ||
|
|
4b8f946699 | ||
|
|
e36c9b1800 | ||
|
|
7fa06731da | ||
|
|
4ec116c012 | ||
|
|
41ed3c0421 | ||
|
|
94f8c68b77 | ||
|
|
d709e25d69 | ||
|
|
ba1e7997ad | ||
|
|
213523c77d | ||
|
|
fbde48438b | ||
|
|
17d3c161e4 | ||
|
|
28c2b365b3 | ||
|
|
31f778d60b | ||
|
|
08a75f4b5a | ||
|
|
e4e1244c0f | ||
|
|
aff2a57db7 | ||
|
|
f3494e0bfb | ||
|
|
e81e3f7b8a | ||
|
|
32d4d1ea8b | ||
|
|
0e3c63ec15 | ||
|
|
be551ac761 | ||
|
|
20d62ee0cf | ||
|
|
584bd4b31b | ||
|
|
f35527c7ed | ||
|
|
1a16e083e7 | ||
|
|
ed37000eba | ||
|
|
82e33f6a17 | ||
|
|
c97230252a | ||
|
|
e9e6cda06e | ||
|
|
10965af845 | ||
|
|
8ca875e6ad | ||
|
|
ea96bb0971 | ||
|
|
1ee0740b13 | ||
|
|
79038a6efb | ||
|
|
5d36638c79 | ||
|
|
9d02ab8080 | ||
|
|
b9597d8d70 | ||
|
|
749b210997 | ||
|
|
5cb10a6d2d | ||
|
|
0e7fe211de | ||
|
|
64c7044282 | ||
|
|
989390f7ce | ||
|
|
98a10cbc7b | ||
|
|
df7d957310 | ||
|
|
a023c535db | ||
|
|
161e337e77 | ||
|
|
c4c1e22587 | ||
|
|
3f7bf24b23 | ||
|
|
1a2b04f5cf | ||
|
|
5c240744eb | ||
|
|
9f64739544 | ||
|
|
5d10c8fbfe | ||
|
|
168241e3c5 | ||
|
|
fd0888b092 | ||
|
|
daf672aa1e | ||
|
|
fd5ea0cf94 | ||
|
|
8bacb65a75 | ||
|
|
0dd4b486c5 | ||
|
|
ae18751d17 | ||
|
|
986d1a937d | ||
|
|
9f2974f4c5 | ||
|
|
e8b507be54 | ||
|
|
13d6aa41d8 | ||
|
|
902593f775 | ||
|
|
bc701b8fd3 | ||
|
|
756fe92601 | ||
|
|
41a7ec93d6 | ||
|
|
dca1eb642f | ||
|
|
ec18dec0d3 | ||
|
|
8a7a332190 | ||
|
|
24f4324ae9 | ||
|
|
6b60f6b086 | ||
|
|
a42e40a68c | ||
|
|
f0bb303655 | ||
|
|
40ec5055e1 | ||
|
|
68b20be2b4 | ||
|
|
9e1b15dabf | ||
|
|
06f64c6ddd | ||
|
|
913e1abcfa | ||
|
|
ba971e7a29 | ||
|
|
bb4041579c | ||
|
|
69f2ec5ec9 | ||
|
|
a6699c41f8 | ||
|
|
d4b2cf003f | ||
|
|
76c302ab5f | ||
|
|
2d579cdf1e | ||
|
|
6e9029273b | ||
|
|
ef1e28b73a | ||
|
|
6868a9a93d | ||
|
|
3aabceb234 | ||
|
|
0d9cde51aa | ||
|
|
a3f2b010f8 | ||
|
|
e6a62bb13b | ||
|
|
665e72ba33 | ||
|
|
171443ee94 | ||
|
|
5b8f324523 | ||
|
|
cfaa4d0a4a | ||
|
|
f02923b24a | ||
|
|
06489ef844 | ||
|
|
64fc19b4d5 | ||
|
|
5f3f8fc253 | ||
|
|
0592402779 | ||
|
|
27c2a3d980 | ||
|
|
3ca3502147 | ||
|
|
5af7108b18 | ||
|
|
befe503aa4 | ||
|
|
226f551e77 | ||
|
|
1db4ef093c | ||
|
|
bc89940564 | ||
|
|
6ec424b15c | ||
|
|
615fa23390 | ||
|
|
65001da0d8 | ||
|
|
f4a8390dc0 | ||
|
|
7257aa3a9f | ||
|
|
475f2e452d | ||
|
|
d9d119ede2 | ||
|
|
8d098f564d | ||
|
|
392cfb9025 | ||
|
|
53cd7f9d66 | ||
|
|
9870ed5e30 | ||
|
|
6aaaf87ade | ||
|
|
36cb9d6aeb | ||
|
|
3749cc2ab5 | ||
|
|
04fdaee83a | ||
|
|
102f92dfc3 | ||
|
|
cf173c49d8 | ||
|
|
44f7471b21 | ||
|
|
224ae9e202 | ||
|
|
aa63ae5eca | ||
|
|
f97127f704 | ||
|
|
33e4c9231e | ||
|
|
813d088339 | ||
|
|
0567135647 | ||
|
|
2582ad9425 | ||
|
|
bad48dee04 | ||
|
|
dd269b195c | ||
|
|
b1893395f0 | ||
|
|
485c58d085 | ||
|
|
bc1a11e373 | ||
|
|
e37cbe1910 | ||
|
|
809bc9670b | ||
|
|
6c16a7b162 | ||
|
|
7d3685ef58 | ||
|
|
21dcfbd991 | ||
|
|
d2a4a17969 | ||
|
|
cdb8bf6802 | ||
|
|
80a056539c | ||
|
|
b92c9e285f | ||
|
|
b677cb11de | ||
|
|
368386abc0 | ||
|
|
d1b0ee7e96 | ||
|
|
13cf02b740 | ||
|
|
1670ff1960 | ||
|
|
9b32d3a9e7 | ||
|
|
6220f52266 | ||
|
|
5ef9240583 | ||
|
|
08d28dc44b | ||
|
|
6571260dd2 | ||
|
|
687f37d837 | ||
|
|
e8c4512a40 | ||
|
|
aa8b72043b | ||
|
|
b5288d4b7d | ||
|
|
a9b846c82a | ||
|
|
5604dd0256 | ||
|
|
5361ad8f7e | ||
|
|
6f6d032ca9 | ||
|
|
a91c38675a | ||
|
|
5fb73a5612 | ||
|
|
c42b2dfe06 | ||
|
|
b1ecb55bd6 | ||
|
|
42d0d076d6 | ||
|
|
d835b666cf | ||
|
|
39581ab824 | ||
|
|
a0a0731cd6 | ||
|
|
5161a9dfd6 | ||
|
|
7a8cbb3241 | ||
|
|
ae643552e9 | ||
|
|
8885c1b49d | ||
|
|
4ee57b710d | ||
|
|
5a31702885 | ||
|
|
dcaf16cecc | ||
|
|
07ed014a83 | ||
|
|
c5f4bafcaf | ||
|
|
1277865343 | ||
|
|
7df94e9bef | ||
|
|
8bb601eecd | ||
|
|
1778a692e0 | ||
|
|
0337b62349 | ||
|
|
39e6ce747d | ||
|
|
e947e60d11 | ||
|
|
a21fc0f35a | ||
|
|
77aace7515 | ||
|
|
eb73591286 | ||
|
|
011085ce3d | ||
|
|
a524e468e4 | ||
|
|
365d93f07e | ||
|
|
795085170a | ||
|
|
c888444287 | ||
|
|
ea320a2087 | ||
|
|
ebf0f57272 | ||
|
|
dc865cf53d | ||
|
|
8d7b938f78 | ||
|
|
453e22f80d | ||
|
|
c6e47526a7 | ||
|
|
9b7a91d828 | ||
|
|
c2d01eb6f1 | ||
|
|
21042ad0e7 | ||
|
|
bcf2ed7841 | ||
|
|
6064e6d03f | ||
|
|
830dc0dcd0 | ||
|
|
88dbcd912e | ||
|
|
2f5d812608 | ||
|
|
74c47672da | ||
|
|
872abea008 | ||
|
|
edba52f401 | ||
|
|
596f2f6820 | ||
|
|
c68cbd3139 | ||
|
|
9c9cf68063 | ||
|
|
3bad354414 | ||
|
|
518a16e895 | ||
|
|
a28baa6197 | ||
|
|
2314badec5 | ||
|
|
cecadb331b | ||
|
|
55b28336e5 | ||
|
|
22beddc8a8 | ||
|
|
c1e2567b15 | ||
|
|
90156a7c1a | ||
|
|
356bfce2c8 | ||
|
|
94fc25dc39 | ||
|
|
e4203060f3 | ||
|
|
aafe7273e3 | ||
|
|
d339e3ebad | ||
|
|
ae7c7cbd23 | ||
|
|
c00e911b28 | ||
|
|
15ff939b1f | ||
|
|
0f080240c6 | ||
|
|
d886526f23 | ||
|
|
d33856f874 | ||
|
|
04c7bb1c97 | ||
|
|
3a1cedc90d | ||
|
|
d449ba4720 | ||
|
|
e2785899a2 | ||
|
|
e57474adfb | ||
|
|
971afafc01 | ||
|
|
7fa9f743dd | ||
|
|
7d506b785d | ||
|
|
8e14f1bf3e | ||
|
|
c486087294 | ||
|
|
51528b2cf9 | ||
|
|
5daa005c1b | ||
|
|
a99dccfc73 | ||
|
|
90603ad9bb | ||
|
|
77877dd501 | ||
|
|
34bfe56f53 | ||
|
|
ce83e8dc00 | ||
|
|
a0a2a5b1f0 | ||
|
|
85a1bcef52 | ||
|
|
f19fe4aa90 | ||
|
|
1a03bceb5c | ||
|
|
15873b9e0c | ||
|
|
8ac4ba24f7 | ||
|
|
42789dbe9e | ||
|
|
7ed9859260 | ||
|
|
0b707495a1 | ||
|
|
e177eca25d | ||
|
|
146cf411ae | ||
|
|
57ed07d1d0 | ||
|
|
5ecd21e664 | ||
|
|
c01012d767 | ||
|
|
af9798a62e | ||
|
|
5294f0712f | ||
|
|
631fc22090 | ||
|
|
1003fa4246 | ||
|
|
54814bc65e | ||
|
|
3d38039b86 | ||
|
|
5b34877429 | ||
|
|
b0a8302dd7 | ||
|
|
90b9ddb7a5 | ||
|
|
3799e0db0d | ||
|
|
f61747aeac | ||
|
|
07000dae3a | ||
|
|
49ffb5bb19 | ||
|
|
ca80972dc7 | ||
|
|
feda8a0b4b | ||
|
|
124c3c545b | ||
|
|
dba3e405f4 | ||
|
|
b4d367eeb4 | ||
|
|
b81cb28615 | ||
|
|
c18c6f6fe2 | ||
|
|
94d006eac8 | ||
|
|
96a8cf3ad5 | ||
|
|
f318fd3a89 | ||
|
|
1a62c322bc | ||
|
|
24b15f4ad2 | ||
|
|
c652f37b69 | ||
|
|
c523a22d89 | ||
|
|
f7c84530d6 | ||
|
|
56228dbb79 | ||
|
|
de16c88418 | ||
|
|
edd06485e0 | ||
|
|
7f94bc5776 | ||
|
|
7d92f0acd7 | ||
|
|
b7af597459 | ||
|
|
1617b73a9d | ||
|
|
8c4dc7a5a8 | ||
|
|
be8ddf4599 | ||
|
|
ff30c61c4c | ||
|
|
33c0577e93 | ||
|
|
f0255e0300 | ||
|
|
0bc1878778 | ||
|
|
a18e2f9c3f | ||
|
|
6b02f49fc6 | ||
|
|
216b7d78e2 | ||
|
|
abdab85362 | ||
|
|
116fdbb33f | ||
|
|
9db1e9b7a5 | ||
|
|
1a74286dfa | ||
|
|
b437a33043 | ||
|
|
03ba9678d5 | ||
|
|
d74beb2176 | ||
|
|
f824308b6a | ||
|
|
cb7151cc27 | ||
|
|
ad8ead2546 | ||
|
|
d356cd32fc | ||
|
|
80c36ba801 | ||
|
|
afb5f9556e | ||
|
|
b3dc41fcd4 | ||
|
|
c88d82f2ac | ||
|
|
395cf742b9 | ||
|
|
72d86ba70b | ||
|
|
a26ccf8d80 | ||
|
|
77ef400598 | ||
|
|
08097f4070 | ||
|
|
32e8a045f4 | ||
|
|
814f5d8c6c | ||
|
|
4f0d677e18 | ||
|
|
5d38115d2f | ||
|
|
200b760512 | ||
|
|
83f4ab0dad | ||
|
|
2df36b11e2 | ||
|
|
1b7f46f02c | ||
|
|
6ae3a55aed | ||
|
|
94e680add4 | ||
|
|
4810125e9a | ||
|
|
3df23112ef | ||
|
|
2ccc9d3071 | ||
|
|
624c1b26c3 | ||
|
|
beba668a4c | ||
|
|
c52ebfc042 | ||
|
|
8b9a974c66 | ||
|
|
f960a4a19b | ||
|
|
9d85ec5e96 | ||
|
|
c00c7be9ae | ||
|
|
336fd76774 | ||
|
|
cd637ef616 | ||
|
|
66e22e26cb | ||
|
|
f10ab71c52 | ||
|
|
d5555697a1 | ||
|
|
3f69e03fcb | ||
|
|
57df3582dd | ||
|
|
14180182d3 | ||
|
|
6ac61ab6d7 | ||
|
|
968de38a94 | ||
|
|
e5fd9395f7 | ||
|
|
251554c044 | ||
|
|
1a1dea00eb | ||
|
|
8485d99336 | ||
|
|
c49246b8c6 | ||
|
|
67c70c071b | ||
|
|
18b34fed31 | ||
|
|
1f4a16e625 | ||
|
|
1a72f771de | ||
|
|
68e741e0c3 | ||
|
|
341c3b6523 | ||
|
|
f046742a4f | ||
|
|
b1167edde7 | ||
|
|
82e9aea057 | ||
|
|
2a8b96cc7f | ||
|
|
328b24de6a | ||
|
|
de4d35e184 | ||
|
|
ecc65be6e1 | ||
|
|
7b98f71393 | ||
|
|
cf0b6be695 | ||
|
|
9365bdab93 | ||
|
|
012cd27b4a | ||
|
|
678d489978 | ||
|
|
c5964fbcd3 | ||
|
|
886657473e | ||
|
|
d2d29185c9 | ||
|
|
7f4f5b24ba | ||
|
|
d2205dc1c0 | ||
|
|
19e721d4af | ||
|
|
9dfecc4d1b | ||
|
|
53994e75f0 | ||
|
|
2e06077337 | ||
|
|
8396d37275 | ||
|
|
150f17b219 | ||
|
|
9a3afa11ed | ||
|
|
edef1aa4c7 | ||
|
|
780a742110 | ||
|
|
a0179cec6e | ||
|
|
ea6b7d8f27 | ||
|
|
dd75a3b943 | ||
|
|
ea5ad040da | ||
|
|
b2f0db0717 | ||
|
|
93c4b62826 | ||
|
|
a132bee1d7 | ||
|
|
d0e98192de | ||
|
|
bcb9397c38 | ||
|
|
1a1ab0df6e | ||
|
|
572e7640cd | ||
|
|
2ece75935e | ||
|
|
2aaaa5654f | ||
|
|
8882301243 | ||
|
|
3aba5c7f9a | ||
|
|
2ef54ccc94 | ||
|
|
d90414ddfa | ||
|
|
a158b77422 | ||
|
|
d79ec4f647 | ||
|
|
ef3b05439a | ||
|
|
0e2e856f12 | ||
|
|
9b0f55fd90 | ||
|
|
7473a01322 | ||
|
|
38b61e290e | ||
|
|
fa0e956c0e | ||
|
|
76aaaf480c | ||
|
|
c1ac157aaf | ||
|
|
73d7e332a4 | ||
|
|
33f85ec8ca | ||
|
|
38a4748e17 | ||
|
|
8f715fd3f2 | ||
|
|
a94435f143 | ||
|
|
a7a9ba996d | ||
|
|
fcf93aac11 | ||
|
|
1d9dbac112 | ||
|
|
4e9981c182 | ||
|
|
7ed8c95409 | ||
|
|
1e68d45659 | ||
|
|
60c00d7a5d | ||
|
|
72811b967e | ||
|
|
927c2a758d | ||
|
|
e5094c5c53 | ||
|
|
154aec849e | ||
|
|
22453161e9 | ||
|
|
d3e1b61096 | ||
|
|
f88a3a846b | ||
|
|
2adbf1e6cd | ||
|
|
6c4f8379ad | ||
|
|
d441f70693 | ||
|
|
033ac8129b | ||
|
|
4111ea4f9f | ||
|
|
578bf3bc7c | ||
|
|
ffd767d4bb | ||
|
|
6e2ab7cedc | ||
|
|
c4f40235f4 | ||
|
|
4753099155 | ||
|
|
eb71bc61ed | ||
|
|
8ae7789e93 | ||
|
|
2c2bf9d665 | ||
|
|
56b4d8165b | ||
|
|
c696b99ccf | ||
|
|
e6eae5cdc4 | ||
|
|
072cc23a42 | ||
|
|
682c0b9995 | ||
|
|
96ad3a18ee | ||
|
|
9ef9633aff | ||
|
|
df5e6c6626 | ||
|
|
d2aebdd477 | ||
|
|
09256be62c | ||
|
|
a4fece11cc | ||
|
|
c2c0b1ec82 | ||
|
|
1d0e80c091 | ||
|
|
3b64d66836 | ||
|
|
5890fffd7f | ||
|
|
eced8617d3 | ||
|
|
587551c1f1 | ||
|
|
a2c4b3d47e | ||
|
|
20ef0c1455 | ||
|
|
cb9551fb00 | ||
|
|
5ed396e390 | ||
|
|
6e96623884 | ||
|
|
87ce02f34d | ||
|
|
0315c2b510 | ||
|
|
2aa31c205a | ||
|
|
23932773ef | ||
|
|
2f50c67f5c | ||
|
|
85d5b5c823 | ||
|
|
25b1923d2e | ||
|
|
e208798531 | ||
|
|
1ba36697ca | ||
|
|
405b8b8ef9 | ||
|
|
1cc215ec30 | ||
|
|
83daeb3f87 | ||
|
|
c4854bb355 | ||
|
|
1dcc6d61dc | ||
|
|
ed7c6946cb | ||
|
|
7baa316224 | ||
|
|
31fd9cbf48 | ||
|
|
e8f279280f | ||
|
|
787acd3bda | ||
|
|
86bd6432ee | ||
|
|
bf847ad045 | ||
|
|
a4e9a04982 | ||
|
|
72a1d33f9d | ||
|
|
bec82127e7 | ||
|
|
8f83773431 | ||
|
|
8495a45002 | ||
|
|
333c8a9cfd | ||
|
|
1baeb7ee61 | ||
|
|
ee5e3bc94f | ||
|
|
7b0a4bce98 | ||
|
|
2221fd3256 | ||
|
|
84a661beaf | ||
|
|
6b93c8f454 | ||
|
|
3a17a860a0 | ||
|
|
6ec5c06bad | ||
|
|
44d8322c4d | ||
|
|
819734f655 | ||
|
|
1cc9de5722 | ||
|
|
96c1ba20da | ||
|
|
855a39ad95 | ||
|
|
209da7ba33 |
@@ -1,832 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Read(**)",
|
||||
"Glob(**)",
|
||||
"Grep(**)",
|
||||
"Bash(curl *)",
|
||||
"Bash(kubectl get *)",
|
||||
"Bash(kubectl describe *)",
|
||||
"Bash(kubectl logs *)",
|
||||
"Bash(kubectl rollout status *)",
|
||||
"Bash(docker ps *)",
|
||||
"Bash(docker logs *)",
|
||||
"Bash(ls *)",
|
||||
"Bash(cat *)",
|
||||
"Bash(head *)",
|
||||
"Bash(tail *)",
|
||||
"Bash(grep *)",
|
||||
"Bash(find *)",
|
||||
"Bash(pwd)",
|
||||
"Bash(which *)",
|
||||
"Bash(echo *)",
|
||||
"Bash(git status *)",
|
||||
"Bash(git log *)",
|
||||
"Bash(git diff *)",
|
||||
"Bash(git branch *)",
|
||||
"Bash(git remote *)",
|
||||
"Edit(**)",
|
||||
"Write(apps/**)",
|
||||
"Write(packages/**)",
|
||||
"Write(docs/**)",
|
||||
"Write(.agents/**)",
|
||||
"Write(k8s/**)",
|
||||
"Write(scripts/**)",
|
||||
"Bash(pnpm *)",
|
||||
"Bash(npm *)",
|
||||
"Bash(npx *)",
|
||||
"Bash(node *)",
|
||||
"Bash(python *)",
|
||||
"Bash(python3 *)",
|
||||
"Bash(pip *)",
|
||||
"Bash(cd *)",
|
||||
"Bash(mkdir *)",
|
||||
"Bash(touch *)",
|
||||
"Bash(cp *)",
|
||||
"Bash(mv *)",
|
||||
"Bash(chmod *)",
|
||||
"Bash(pytest *)",
|
||||
"Bash(playwright *)",
|
||||
"Bash(git add *)",
|
||||
"Bash(git commit *)",
|
||||
"Bash(git stash *)",
|
||||
"Bash(ssh *)",
|
||||
"Bash(scp *)",
|
||||
"Bash(export KUBECONFIG=*)",
|
||||
"Bash(git push:*)",
|
||||
"Bash(claude --version)",
|
||||
"Bash(git check-ignore:*)",
|
||||
"WebSearch",
|
||||
"Bash(claude plugin:*)",
|
||||
"Bash(claude --channels)",
|
||||
"Bash(claude --channels plugin:telegram@claude-plugins-official --help)",
|
||||
"Bash(bash)",
|
||||
"Bash(source ~/.zshrc)",
|
||||
"Bash(~/.bun/bin/bun --version)",
|
||||
"Bash(env)",
|
||||
"Bash(claude upgrade:*)",
|
||||
"Bash(/Users/ogt/.local/bin/claude --help)",
|
||||
"Bash(CLAUDE_CODE_EXPERIMENTAL_CHANNELS=1 claude --help)",
|
||||
"Bash(claude --channels plugin:telegram@claude-plugins-official --print \"hello\")",
|
||||
"Bash(mkdir -p ~/.claude/channels/telegram)",
|
||||
"Bash(~/.claude/channels/telegram/.env)",
|
||||
"Bash(~/.bun/bin/bun run:*)",
|
||||
"Bash(sudo ln:*)",
|
||||
"Bash(ln -sf ~/.bun/bin/bun /opt/homebrew/bin/bun)",
|
||||
"Bash(xargs python:*)",
|
||||
"Bash(uv --version)",
|
||||
"Bash(pip3 install:*)",
|
||||
"Bash(pip3 show:*)",
|
||||
"Bash(ruff *)",
|
||||
"Bash(mypy *)",
|
||||
"Bash(black *)",
|
||||
"Bash(isort *)",
|
||||
"Bash(timeout *)",
|
||||
"Bash(wc *)",
|
||||
"Bash(sort *)",
|
||||
"Bash(uniq *)",
|
||||
"Bash(awk *)",
|
||||
"Bash(sed *)",
|
||||
"Bash(tr *)",
|
||||
"Bash(tee *)",
|
||||
"Bash(xargs *)",
|
||||
"Bash(test *)",
|
||||
"Bash([ *)",
|
||||
"Bash(true)",
|
||||
"Bash(false)",
|
||||
"Bash(date *)",
|
||||
"Bash(sleep *)",
|
||||
"Bash(kill *)",
|
||||
"Bash(pkill *)",
|
||||
"Bash(ps *)",
|
||||
"Bash(top *)",
|
||||
"Bash(htop *)",
|
||||
"Bash(df *)",
|
||||
"Bash(du *)",
|
||||
"Bash(free *)",
|
||||
"Bash(uname *)",
|
||||
"Bash(hostname *)",
|
||||
"Bash(whoami)",
|
||||
"Bash(id *)",
|
||||
"Bash(groups *)",
|
||||
"Bash(stat *)",
|
||||
"Bash(file *)",
|
||||
"Bash(realpath *)",
|
||||
"Bash(dirname *)",
|
||||
"Bash(basename *)",
|
||||
"Bash(type *)",
|
||||
"Bash(command *)",
|
||||
"Bash(hash *)",
|
||||
"Bash(alias *)",
|
||||
"Bash(set *)",
|
||||
"Bash(unset *)",
|
||||
"Bash(printenv *)",
|
||||
"Bash(diff *)",
|
||||
"Bash(cmp *)",
|
||||
"Bash(comm *)",
|
||||
"Bash(join *)",
|
||||
"Bash(paste *)",
|
||||
"Bash(cut *)",
|
||||
"Bash(rev *)",
|
||||
"Bash(nl *)",
|
||||
"Bash(fmt *)",
|
||||
"Bash(fold *)",
|
||||
"Bash(pr *)",
|
||||
"Bash(expand *)",
|
||||
"Bash(unexpand *)",
|
||||
"Bash(od *)",
|
||||
"Bash(xxd *)",
|
||||
"Bash(hexdump *)",
|
||||
"Bash(strings *)",
|
||||
"Bash(base64 *)",
|
||||
"Bash(md5sum *)",
|
||||
"Bash(sha256sum *)",
|
||||
"Bash(jq *)",
|
||||
"Bash(yq *)",
|
||||
"Bash(gh *)",
|
||||
"Bash(docker build *)",
|
||||
"Bash(docker run *)",
|
||||
"Bash(docker exec *)",
|
||||
"Bash(docker compose *)",
|
||||
"Bash(docker-compose *)",
|
||||
"Bash(docker images *)",
|
||||
"Bash(docker inspect *)",
|
||||
"Bash(docker network *)",
|
||||
"Bash(docker volume *)",
|
||||
"Bash(kubectl apply *)",
|
||||
"Bash(kubectl create *)",
|
||||
"Bash(kubectl exec *)",
|
||||
"Bash(kubectl port-forward *)",
|
||||
"Bash(kubectl config *)",
|
||||
"Bash(helm *)",
|
||||
"Bash(terraform *)",
|
||||
"Bash(ansible *)",
|
||||
"Bash(bun *)",
|
||||
"Bash(deno *)",
|
||||
"Bash(cargo *)",
|
||||
"Bash(rustc *)",
|
||||
"Bash(go *)",
|
||||
"Bash(java *)",
|
||||
"Bash(javac *)",
|
||||
"Bash(gradle *)",
|
||||
"Bash(mvn *)",
|
||||
"Bash(make *)",
|
||||
"Bash(cmake *)",
|
||||
"Bash(ninja *)",
|
||||
"Bash(uv *)",
|
||||
"Bash(poetry *)",
|
||||
"Bash(pipx *)",
|
||||
"Bash(virtualenv *)",
|
||||
"Bash(venv *)",
|
||||
"Bash(conda *)",
|
||||
"Bash(brew *)",
|
||||
"Bash(apt *)",
|
||||
"Bash(apt-get *)",
|
||||
"Bash(yum *)",
|
||||
"Bash(dnf *)",
|
||||
"Bash(pacman *)",
|
||||
"Bash(snap *)",
|
||||
"Bash(flatpak *)",
|
||||
"Bash(systemctl status *)",
|
||||
"Bash(journalctl *)",
|
||||
"Bash(service * status)",
|
||||
"Bash(nc *)",
|
||||
"Bash(netstat *)",
|
||||
"Bash(ss *)",
|
||||
"Bash(lsof *)",
|
||||
"Bash(nmap *)",
|
||||
"Bash(dig *)",
|
||||
"Bash(nslookup *)",
|
||||
"Bash(host *)",
|
||||
"Bash(ping *)",
|
||||
"Bash(traceroute *)",
|
||||
"Bash(mtr *)",
|
||||
"Bash(wget *)",
|
||||
"Bash(http *)",
|
||||
"Bash(httpie *)",
|
||||
"Bash(hadolint apps/api/Dockerfile)",
|
||||
"Bash(docker info:*)",
|
||||
"Bash(kubectl cluster-info:*)",
|
||||
"Read(//var/run/**)",
|
||||
"Bash(open -a Docker)",
|
||||
"Bash(git rm:*)",
|
||||
"Bash(git reset:*)",
|
||||
"Bash(kubectl --kubeconfig ~/.kube/config get pods -n awoooi -o wide)",
|
||||
"Bash(kubectl scale:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollam@192.168.0.188 \"docker ps -a | grep -i claw\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker start clawbot && sleep 3 && docker logs clawbot --tail=10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep clawbot && docker port clawbot\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot/.env | grep -E ''\\(TG_|TELEGRAM\\)'' | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Mounts}}{{.Source}}:{{.Destination}} {{end}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep -E ''\\(TG_|TELEGRAM|ENABLED\\)''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''logout\\\\|log.out\\\\|shutdown\\\\|stop'' | tail -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''\\(getMe|getUpdates|sendMessage\\).*200'' | tail -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''success\\\\|started\\\\|初始化'' | head -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''2026-03-\\(19|20|21\\)'' | grep -i ''error\\\\|fail\\\\|logout\\\\|400\\\\|401'' | head -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop clawbot && docker rm clawbot && echo ''✅ OpenClaw 已永久停用''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose ps 2>/dev/null || ls -la docker-compose.yml 2>/dev/null || find /home/ollama -name ''docker-compose*.yml'' -type f 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose up -d && sleep 3 && docker-compose ps\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d 2>&1 || docker run -d --name clawbot --restart unless-stopped -p 8088:8088 -v /var/run/docker.sock:/var/run/docker.sock 192.168.0.110:5000/library/clawbot:stable-v6 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=15 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep -E ''clawbot|litellm''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAHrJ5CMOb4rP0IYJrCUiDViLsnpK69uEUI|'' .env && grep TELEGRAM_BOT_TOKEN .env\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''{{.Names}}'' | grep -i alert\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop alertmanager && docker rm alertmanager && echo ''✅ 舊 AIOPS Alertmanager 已停用''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Image}}\\\\t{{.Status}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/prometheus/alert_rules.yml 2>/dev/null | grep -A5 ''ClawbotDown\\\\|telegram\\\\|AIOPS'' | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"find /home/ollama -name ''*.yml'' -type f 2>/dev/null | xargs grep -l ''ClawbotDown\\\\|telegram'' 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''協同警報\\\\|ClawbotDown'' /app 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/prometheus.yml 2>/dev/null | grep -A10 ''alerting\\\\|alertmanager''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -i alert || echo ''✅ 沒有 alertmanager 在運行''\")",
|
||||
"Bash(jq -r '.status, .components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep clawbot && docker logs clawbot --tail=15\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep TELEGRAM\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAFjDyjAN94QQrjn1gBnFXAyS20EUyozH8c|'' .env && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''ClawBotDown\\\\|ClawbotDown'' /app 2>/dev/null | head -5 || echo ''在程式碼中找不到''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -A10 ''ClawBot\\\\|clawbot'' | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -i ''clawbot\\\\|claw'' -A5 -B5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since=5m 2>&1 | grep -i ''clawbot\\\\|incident\\\\|alert'' | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 50 2>&1\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|polling\\\\|bot'' | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Ports}}'' | grep -E ''claw|NAME''\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''telegram|Telegram|error|Error'' | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep ollama\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sed -i ''s|host.docker.internal|172.17.0.1|g'' /home/ollama/clawbot-v5/.env && cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose restart clawbot && sleep 3 && docker logs clawbot --tail 30 2>&1\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose restart clawbot && sleep 5 && docker logs clawbot --tail 30 2>&1\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 200\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -10\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -iE ''error|telegram|polling|alert|send'' | tail -30\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d --force-recreate clawbot && sleep 5 && docker logs clawbot 2>&1 | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 100\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since 5m 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db psql -U postgres -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec -e PGPASSWORD=clawbot123 momo-db psql -U clawbot -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -E ''postgres|db''\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db env | grep -i postgres\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"PGPASSWORD=AwoooiProd2026 psql -h localhost -U awoooi -d awoooi_prod -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl config get-contexts)",
|
||||
"Bash(docker tag:*)",
|
||||
"Bash(docker push:*)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd ~/awoooi-build && find apps/web/src -name ''''*.ts'''' -o -name ''''*.tsx'''' | head -30 | xargs md5sum\")",
|
||||
"Bash(rsync -avz --exclude 'node_modules' --exclude '.next' --exclude '.turbo' --exclude '*.log' /Users/ogt/awoooi/ ollama@192.168.0.188:~/awoooi-build/)",
|
||||
"Bash(gh run:*)",
|
||||
"Bash(APPROVAL_ID=\"ea43578e-17cd-40b9-b4c3-8fe8e92f225c\" __NEW_LINE_76dc92b2699cd7d5__ echo \"=== 檢查 Approval Metadata ===\" curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
|
||||
"Bash(APPROVAL_ID=\"865ab726-c3b9-447e-86a9-65a6227516e6\" __NEW_LINE_db14ef76ca26af32__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Test resolution\"\"\"\"}')",
|
||||
"Read(//Users/ogt/awoooi/**)",
|
||||
"Bash(APPROVAL_ID=\"e9445e68-6c3e-4899-b507-3b9b7bcaf0a7\" __NEW_LINE_680ad94d4896e58a__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Final test\"\"\"\"}')",
|
||||
"Bash(APPROVAL_ID=\"eb0afb4e-834b-4af7-9ae0-3c58232fdd99\" INCIDENT=\"INC-20260323-F05CD6\" __NEW_LINE_47f1c3803a64b43c__ echo \"=== 簽核前 Incident 狀態 ===\" curl -s \"https://awoooi.wooo.work/api/v1/incidents/$INCIDENT\")",
|
||||
"Bash(mkdir -p /Users/ogt/awoooi/.claude/hooks)",
|
||||
"Bash(/Users/ogt/awoooi/.claude/hooks/pre-commit-check.sh:*)",
|
||||
"Bash(git -C /Users/ogt/awoooi status packages/lewooogo-core/)",
|
||||
"Bash(git -C /Users/ogt/awoooi ls-files packages/lewooogo-core/src/)",
|
||||
"Bash(git -C /Users/ogt/awoooi status --short)",
|
||||
"Bash(git -C /Users/ogt/awoooi add apps/api/pyproject.toml apps/api/scripts/ apps/api/src/ apps/web/.eslintrc.js apps/web/src/ packages/lewooogo-core/.eslintrc.js)",
|
||||
"Bash(git -C /Users/ogt/awoooi diff --cached --stat)",
|
||||
"Bash(git -C:*)",
|
||||
"Bash(for wf:*)",
|
||||
"Bash(do)",
|
||||
"Bash(done)",
|
||||
"Bash(jq 'if type == \"\"\"\"array\"\"\"\" then .[0] | {incident_id, status, decision} else . end')",
|
||||
"Bash(PYTHONPATH=. python -c \"from src.api.v1.stats import router; print\\(''✅ stats.py 載入成功,路由數:'', len\\(router.routes\\)\\)\")",
|
||||
"Bash(PYTHONPATH=. pytest tests/ -v --tb=short)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_stats_api.py -v --tb=short)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=long)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=short)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py -v --tb=short)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns awoooi && kubectl get all -n awoooi')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=50')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=100 | grep -i telegram')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=50 | grep -i webhook')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=30')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n monitoring | grep alertmanager')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | head -50\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc -n awoooi-prod')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl patch configmap alertmanager-config -n monitoring --type merge -p ''{\"\"data\"\":{\"\"alertmanager.yml\"\":\"\"global:\\\\n resolve_timeout: 5m\\\\n\\\\nroute:\\\\n group_by: [\\\\\"\"alertname\\\\\"\", \\\\\"\"severity\\\\\"\"]\\\\n group_wait: 30s\\\\n group_interval: 5m\\\\n repeat_interval: 4h\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n routes:\\\\n - match:\\\\n severity: critical\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 10s\\\\n repeat_interval: 1h\\\\n - match:\\\\n severity: warning\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 1m\\\\n repeat_interval: 4h\\\\n\\\\nreceivers:\\\\n - name: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n webhook_configs:\\\\n - url: \\\\\"\"http://192.168.0.120:32334/api/v1/webhook/alertmanager\\\\\"\"\\\\n send_resolved: true\\\\n\\\\ninhibit_rules:\\\\n - source_match:\\\\n severity: \\\\\"\"critical\\\\\"\"\\\\n target_match:\\\\n severity: \\\\\"\"warning\\\\\"\"\\\\n equal: [\\\\\"\"alertname\\\\\"\", \\\\\"\"instance\\\\\"\"]\\\\n\"\"}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl rollout restart deployment/alertmanager -n monitoring && kubectl rollout status deployment/alertmanager -n monitoring')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | grep -A 3 ''url:''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o jsonpath=\"\"{range .items[*]}{.metadata.name}{\\\\\"\" \\\\\"\"}{.spec.containers[*].image}{\\\\\"\"\\\\\\\\n\\\\\"\"}{end}\"\"')",
|
||||
"Bash(git mv:*)",
|
||||
"Bash(for file:*)",
|
||||
"Bash(do echo:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 wooo@192.168.0.120 \"echo ''Connected''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get deployment -n awoooi-prod -o jsonpath=''{range .items[*]}{.metadata.name}{\"\" selector: \"\"}{.spec.selector.matchLabels}{\"\"\\\\n\"\"}{end}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl delete deployment awoooi-api awoooi-web awoooi-worker -n awoooi-prod\")",
|
||||
"WebFetch(domain:awoooi.wooo.work)",
|
||||
"WebFetch(domain:api.awoooi.wooo.work)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o wide')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc,ingress -n awoooi-prod')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'curl -sf http://10.43.125.201:8000/api/v1/health 2>&1 || echo \"\"FAILED\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo nginx -t 2>&1 && sudo cat /etc/nginx/sites-enabled/awoooi* 2>/dev/null || sudo cat /etc/nginx/conf.d/awoooi* 2>/dev/null || echo \"\"No awoooi nginx config found\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'cat /etc/nginx/sites-enabled/* 2>/dev/null | grep -A5 awoooi || cat /etc/nginx/conf.d/* 2>/dev/null | grep -A5 awoooi || ls -la /etc/nginx/ 2>/dev/null || echo \"\"No nginx on this host\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'ls /etc/nginx/sites-enabled/ 2>/dev/null && cat /etc/nginx/sites-enabled/*awoooi* 2>/dev/null || echo \"\"Checking conf.d...\"\" && ls /etc/nginx/conf.d/ 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -l awoooi /etc/nginx/sites-enabled/* 2>/dev/null || grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -r \"\"awoooi\\\\|32334\\\\|32335\"\" /etc/nginx/ 2>/dev/null | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/ && echo \"\"Config copied\"\" && sudo nginx -t 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo \"\"No ssl dir\"\" && sudo ls -la /etc/letsencrypt/live/ 2>/dev/null | head -10')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S sed -i \"\"s|/etc/nginx/ssl/awoooi.crt|/etc/letsencrypt/live/awoooi.wooo.work/fullchain.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && sudo sed -i \"\"s|/etc/nginx/ssl/awoooi.key|/etc/letsencrypt/live/awoooi.wooo.work/privkey.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && echo \"\"Paths fixed\"\" && sudo nginx -t 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"Nginx reloaded!\"\" && sleep 2')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -5')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S grep -rl \"\"awoooi.wooo.work\"\" /etc/nginx/ 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo \"\"FAILED to reach 121\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && sudo nginx -t && sudo nginx -s reload && echo \"\"Cleaned up duplicate config\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -30 /var/log/nginx/error.log 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"api.awoooi\"\" /etc/nginx/ 2>/dev/null || echo \"\"No api.awoooi config found\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep -A5 NEXT_PUBLIC')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get deployment awoooi-web -n awoooi-prod -o yaml | grep -A20 \"\"env:\"\" | head -25')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -10 /var/log/nginx/access.log 2>/dev/null | grep awoooi')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -5 /var/log/nginx/error.log 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S stat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null | grep -E \"\"Modify|Change|Birth\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs -n awoooi-prod -l app=awoooi-web --tail=30 2>/dev/null | grep -i \"\"api\\\\|error\\\\|fetch\"\" | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/access.log 2>/dev/null | grep -E \"\"awoooi.*api\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/awoooi-prod-access.log 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- env | grep -i api')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"grep -r \\\\\"\"NEXT_PUBLIC_API_URL\\\\|api.awoooi\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -5 || grep -r \\\\\"\"awoooi.wooo.work\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -3\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"find /app/.next -name \\\\\"\"*.js\\\\\"\" -exec grep -l \\\\\"\"awoooi\\\\\"\" {} \\\\; 2>/dev/null | head -3\"\"')",
|
||||
"Bash(./scripts/qa-zero-touch.sh)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi.wooo.work.conf /etc/nginx/sites-available/awoooi.wooo.work.conf && sudo nginx -t 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"✅ Nginx reloaded with load balancing!\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt && sudo ls -la sentry 2>/dev/null || echo \"\"Sentry 目錄不存在,需要建立\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'sudo mkdir -p /opt/sentry && sudo chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -5')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"0936223270\"\" | sudo -S mkdir -p /opt/sentry && echo \"\"0936223270\"\" | sudo -S chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -10')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ls -la 2>&1 | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && git describe --tags 2>/dev/null || git rev-parse --short HEAD')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ./install.sh --help 2>&1 | head -30 || echo \"\"No help available, checking script...\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && nohup ./install.sh --skip-user-creation --no-report-self-hosted-issues > /tmp/sentry-install.log 2>&1 &')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'tail -30 /tmp/sentry-install.log 2>/dev/null || echo \"\"日誌檔案尚未建立,等待中...\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^\\\\▶|^Creating|^Starting|^Error|^✓|Pulling\"\" /tmp/sentry-install.log 2>/dev/null | tail -40')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近進度 ===\"\" && tail -10 /tmp/sentry-install.log')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting\"\" /tmp/sentry-install.log | tail -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近 20 行 ===\"\" && tail -20 /tmp/sentry-install.log')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|Building|DONE\"\" /tmp/sentry-install.log | tail -30')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|DONE|Completed|success\"\" /tmp/sentry-install.log | tail -25')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^▶|✓|Error|Completed|success|fail\"\" /tmp/sentry-install.log | tail -15')",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 KEYS incident:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/alertmanager.yml 2>/dev/null || cat /etc/alertmanager/alertmanager.yml 2>/dev/null || echo ''Config not found''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 30 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 20 2>&1 | grep -iE ''telegram|send|alert|incident|error''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''TELEGRAM|TG_'' | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''REDIS|POSTGRES|DATABASE'' | head -5\")",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts?active=true\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Active alerts: {len\\(alerts\\)}\\\\\"\"\\)\"\"')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Total alerts: {len\\(alerts\\)}\\\\\"\"\\); [print\\(a[\\\\\"\"labels\\\\\"\"][\\\\\"\"alertname\\\\\"\"]\\) for a in alerts[:5]]\"\"')",
|
||||
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Status: {d.get\\(\\\\\"\"status\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\(\\\\\"\"message_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"chat_id: {d.get\\(\\\\\"\"chat_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\)\"\"')",
|
||||
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"status: {d.get\\('status'\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\('message_id'\\)}\\\\\"\"\\); print\\(f\\\\\"\"created_at: {d.get\\('created_at'\\)}\\\\\"\"\\)\"\"')",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *approval*)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *incident*)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *pending*)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/k3s-prod.yaml get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
|
||||
"Bash(python3 -c \":*)",
|
||||
"Bash(/tmp/awoooi-tg-secret.yaml:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl apply -f /tmp/awoooi-tg-secret.yaml)",
|
||||
"Bash(for pod:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password wooo@192.168.0.188 \"echo connected && ollama --version\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password ollama@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S curl -fsSL https://ollama.com/install.sh | sudo -S sh\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"ollama --version\")",
|
||||
"Bash(__NEW_LINE_95e9df111552805b__ echo:*)",
|
||||
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/nginx/awoooi-prod.conf ollama@192.168.0.188:/tmp/awoooi-prod.conf)",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo ''No ssl dir''; echo ''0936223270'' | sudo -S ls -la /etc/nginx/conf.d/ 2>/dev/null | head -10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -r ''ssl_certificate'' /etc/nginx/ 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -A 20 ''server_name awoooi'' /etc/nginx/sites-enabled/all-sites.conf 2>/dev/null | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/sites-enabled/ 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S nginx -s reload 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S systemctl reload nginx 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs openclaw 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Image}}'' 2>&1 | head -15\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i telegram | tail -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml 2>&1 | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"curl -sf ''http://localhost:9093/api/v1/alerts'' | jq ''.data | length'' 2>/dev/null || curl -sf ''http://localhost:9093/api/v2/alerts'' | jq ''length'' 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager wget -qO- ''http://localhost:9093/api/v2/alerts'' 2>&1 | head -100\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl -n awoooi-prod logs -l app=awoooi-worker --tail=50 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"cat /home/ollama/alertmanager/alertmanager.yml 2>/dev/null || docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker cp /tmp/alertmanager.yml alertmanager:/etc/alertmanager/alertmanager.yml && docker exec alertmanager amtool check-config /etc/alertmanager/alertmanager.yml && docker kill -s SIGHUP alertmanager\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker inspect alertmanager --format ''{{range .Mounts}}{{.Source}} -> {{.Destination}}{{println}}{{end}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker restart alertmanager && sleep 3 && docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|webhook\\\\|alert'' | tail -10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -E ''''POST|webhook|alertmanager|ManualTest''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -iE ''''POST|webhook''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>/dev/null | grep -iE ''''POST.*webhook|alertmanager_webhook|NewFingerprint''''\")",
|
||||
"Bash(kustomize build:*)",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl exec deploy/awoooi-api -n awoooi-prod -- env)",
|
||||
"Bash(git checkout:*)",
|
||||
"Bash(jq -r '.status // \"\"\"\"failed\"\"\"\"')",
|
||||
"Bash(jq -r '.total // \"\"\"\"error\"\"\"\"')",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XLEN awoooi:signals)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XRANGE awoooi:signals - + COUNT 5)",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/sentry/projects/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" __NEW_LINE_583db0bbb6875db0__ echo \"=== Alert Rules ===\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get nodes -o wide && echo ''---'' && kubectl top nodes 2>/dev/null || echo ''metrics-server not installed''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide && echo ''---'' && kubectl get pvc -n awoooi-prod 2>/dev/null && echo ''---'' && kubectl get sc 2>/dev/null && echo ''---'' && kubectl get deploy -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get ns && echo ''---'' && kubectl get svc -A | grep -E ''prometheus|grafana|metrics|signoz|longhorn|argocd'' || echo ''No monitoring/gitops services found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || echo ''--- K3s default config \\(no custom config.yaml\\) ---'' && echo ''---'' && sudo k3s check-config 2>/dev/null | head -30 || echo ''check-config not available''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"free -h && echo ''---'' && swapon --show && echo ''---'' && df -h /var/lib/rancher/k3s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n cnpg-system && echo ''---'' && kubectl get svc -n monitoring\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get all -n awoooi-prod -o wide 2>/dev/null && echo ''---QUOTA---'' && kubectl describe quota -n awoooi-prod 2>/dev/null && echo ''---EVENTS---'' && kubectl get events -n awoooi-prod --sort-by=''.lastTimestamp'' 2>/dev/null | tail -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get helmcharts -A 2>/dev/null || echo ''No HelmCharts'' && echo ''---'' && kubectl get helmreleases -A 2>/dev/null || echo ''No HelmReleases'' && echo ''---'' && kubectl api-resources | grep -E ''argo|flux|velero|longhorn'' || echo ''No GitOps/Backup CRDs''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get ds -A && echo ''---'' && kubectl get cm -n kube-system | grep -E ''traefik|coredns'' && echo ''---REGISTRIES---'' && sudo cat /etc/rancher/k3s/registries.yaml 2>/dev/null || echo ''No registries.yaml''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get ingress -A 2>/dev/null || echo ''No Ingress'' && echo ''---HPA---'' && kubectl get hpa -A 2>/dev/null || echo ''No HPA'' && echo ''---PDB---'' && kubectl get pdb -A 2>/dev/null || echo ''No PDB'' && echo ''---SYSCTL---'' && cat /proc/sys/net/core/somaxconn && cat /proc/sys/fs/file-max\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s | head -20 && echo ''---K3S-VERSION---'' && k3s --version && echo ''---ETCD-STATUS---'' && sudo k3s etcd-snapshot list 2>/dev/null | head -5 || echo ''No etcd snapshots''\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"free -h && swapon --show && echo ''---DISK---'' && df -h /var/lib/rancher/k3s 2>/dev/null\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo ls -la /var/lib/rancher/k3s/server/db/ 2>/dev/null && echo ''---TOKEN---'' && sudo cat /var/lib/rancher/k3s/server/token 2>/dev/null | head -1 | cut -c1-20\")",
|
||||
"Bash(ssh -o ConnectTimeout=10 wooo@192.168.0.120 \"ps aux | grep k3s | grep -v grep | head -3 && echo ''---'' && sudo cat /etc/systemd/system/k3s.service 2>/dev/null | grep -E ''ExecStart|datastore''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S mkdir -p /backup/k3s_etcd 2>/dev/null && echo ''0936223270'' | sudo -S chown ollama:ollama /backup/k3s_etcd 2>/dev/null && echo ''=== 188 備份目錄 ==='' && ls -la /backup/\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"mkdir -p ~/.ssh && chmod 700 ~/.ssh && echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCnTnbjtSPwrI/pN6DByDxsFDOR4+sVnk7hb+eOr+Pb4e7o7QGbyKaJC2eKP7uRBilPqeScuvNKZhwmY8ZOuhjId+ZyLK0jZXHdq3a6tjsQ4MwPGyT2aMaD7x2jKzPbFojR0P5lmQWH2zjxeVuB7UeBIejaYk3gQEMFVES8Xh84yxFvy9jlwKmZFAI0gIhx0nPOTPB7onTyb8L5snUbwQQntoHWYFbb83+wui/kM15aLT5r8uvS2yZdsWWrDvAyuIShde1ceTBevwwqxezH1egXGoGkvZYYF7vHFu3X6jF7Nfp4qVfo0EfFV3omy90HzoFvoEXCC+jIWU0TjUqdEgGIEj2b+YXw3bIs+k+g/0/iJzA5LLUNb2vHVHoUmah4ZNlfiGU7e6hTYXjLjoXJlz9gfv6LYywhgktdThi9sUCn6rzbatlMrY0HNUE6uOwRTugMq1YUEJCvRqeFmtX5yF6xGp+FbOjIr1kMmplbRQRqKIrpQoqEn0+UBXC7OwJNCk8= wooo@mon'' >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && echo ''SSH key 已加入''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCs3nQ11B+V/VEchNR9Uzj57JoKXOJ8S1UVjCTHkUDL8FnrbdPFr0zvpYgX0a/Ipj9wHkqU6z6Ho6MQj3X2+HaK5fC0fZ3aZE1QT2df/x0xXdyka9XSaTFaymKzNTvfmum40koBkNccKyO5SLSjTcoTZCDHP4RqHHu/MYjQMejG7yeyCFmgumrHh5T/0DXPf5zl0Ff1C5U3VCLPxz5vq63JB2dTfrjQLg3sO0ZI3KTZE8aFj3txKz5snDZX3nE1tHZMKLecwwEqi130BtVZcm8zXDqX83gtUDp/WLfPyKCmzZzGf6YgEofIsyrVup8XnD9xNoFmbEeBdFocGWeoIVIn+faOpU22fvQ34L57GHhNQwygZOPKsZa9XNKjayKdKQl3gcAA2wnkZgN0cyIEYvTd3O+Z5Xvff2dat+0sDMK571V+0JEdAMOpQjFO7DkwjKHn/gHLmvRjYLiUOItX9JysFgYuHs8omad2LmeUIkQrBD2I2hyvY49HaJKWctk4Jm0= root@mon'' >> ~/.ssh/authorized_keys && echo ''Root SSH key added''\")",
|
||||
"Bash(grep -r \"\"\"zod\"\"\" /Users/ogt/awoooi/package.json /Users/ogt/awoooi/apps/*/package.json /Users/ogt/awoooi/packages/*/package.json)",
|
||||
"Bash(__NEW_LINE_144503b060dfd3dd__ echo:*)",
|
||||
"Bash(__NEW_LINE_ae2a22b14586d7aa__ echo:*)",
|
||||
"Bash(__NEW_LINE_e17561a4e55f74d4__ echo:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | sed ''''s|https://127.0.0.1:6443|https://192.168.0.125:6443|g''''\")",
|
||||
"Bash(KUBECONFIG=/tmp/kubeconfig-vip.yaml kubectl get nodes)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A --no-headers)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get jobs -A --no-headers)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod --no-headers)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml delete job api-watchdog-29556380 -n wooo-aiops-uat)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -n awoooi-prod)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get svc -A)",
|
||||
"Bash(PGPASSWORD=changeme psql -h 192.168.0.188 -U awoooi -d awoooi_prod -f /Users/ogt/awoooi/apps/api/scripts/migrate_phase18_audit_logs.sql)",
|
||||
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list)",
|
||||
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list --workers=1)",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl get nodes --server=https://192.168.0.125:6443 --insecure-skip-tls-verify)",
|
||||
"Bash(source .venv/bin/activate)",
|
||||
"Read(//etc/postgresql/14/main/**)",
|
||||
"Bash(for port:*)",
|
||||
"Bash(kubectl top:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl top pods -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
|
||||
"Bash(jq -r '.components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
|
||||
"Bash(tar -xzf velero-v1.13.0-darwin-arm64.tar.gz)",
|
||||
"Bash(sudo mv:*)",
|
||||
"Bash(velero version:*)",
|
||||
"Bash(mkdir -p ~/bin)",
|
||||
"Bash(mv velero-v1.13.0-darwin-arm64/velero ~/bin/)",
|
||||
"Bash(~/bin/velero version:*)",
|
||||
"Bash(k8s/velero/00-namespace.yaml:*)",
|
||||
"Bash(k8s/velero/01-credentials.yaml:*)",
|
||||
"Bash(k8s/velero/02-velero-install.yaml:*)",
|
||||
"Bash(tar -xzf velero.tar.gz)",
|
||||
"Bash(/tmp/velero-credentials:*)",
|
||||
"Bash(__NEW_LINE_e85d95513fc16492__ ~/bin/velero install --provider aws --plugins velero/velero-plugin-for-aws:v1.9.0 --bucket velero-backups --secret-file /tmp/velero-credentials --backup-location-config region=minio,s3ForcePathStyle=true,s3Url=http://192.168.0.188:9000 --use-volume-snapshots=false --dry-run -o yaml)",
|
||||
"Bash(__NEW_LINE_e85d95513fc16492__ head:*)",
|
||||
"Bash(k8s/velero/README.md:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl apply -f /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml)",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"whoami && hostname && cat /etc/sudoers.d/* 2>/dev/null | head -5 || echo ''no sudoers.d files''\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get nodes 2>&1 || echo ''kubectl failed, checking k3s kubeconfig...'' && ls -la /etc/rancher/k3s/k3s.yaml 2>&1\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo -l 2>&1 | head -20\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''09362233270'' | sudo -S -l 2>&1\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get nodes 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml wooo@192.168.0.120:/tmp/velero-install-full.yaml)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S kubectl apply -f /tmp/velero-install-full.yaml 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backupstoragelocation -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=30 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=10 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get secret cloud-credentials -n velero -o jsonpath=''{.data.cloud}'' 2>&1 | base64 -d\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S curl -s http://192.168.0.188:9000/velero-backups/ 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl rollout restart deployment/velero -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backups -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl describe backup test-backup-20260328-2114 -n velero 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p:*)",
|
||||
"Read(//Users/ogt/awoooi/=== 測試 /approvals/**)",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get svc -n velero -o wide)",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get pods -n velero -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get svc -n velero)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get pods -A | grep -E \\\\\"\"kube-state|state-metrics\\\\\"\"\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get ns | grep -E \\\\\"\"wooo|aiops|legacy|old\\\\\"\"\"\"')",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl get ns --no-headers)",
|
||||
"WebFetch(domain:build.nvidia.com)",
|
||||
"WebFetch(domain:ollama.com)",
|
||||
"WebFetch(domain:docs.api.nvidia.com)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"curl -s ''http://admin:admin@localhost:3002/api/search?type=dash-db'' | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''Dashboard 數量: {len\\(d\\)}''\\); [print\\(f\\\\\"\" - {i[''title'']}\\\\\"\"\\) for i in d[:10]]\"\"\")",
|
||||
"Bash(jq '.ai_provider // .data.ai_provider // \"\"\"\"not found\"\"\"\"')",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod deployment/awoooi-api --tail=50)",
|
||||
"Bash(export NVIDIA_API_KEY=\"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\")",
|
||||
"Bash(curl -s -X POST \"https://integrate.api.nvidia.com/v1/chat/completions\" -H \"Content-Type: application/json\" -H \"Authorization: Bearer $NVIDIA_API_KEY\" -d '{:*)",
|
||||
"Bash(/tmp/fix-network-policy.yaml:*)",
|
||||
"Bash(__NEW_LINE_acde7a92ceae01f6__ scp:*)",
|
||||
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/alertmanager -H 'Content-Type: application/json' -d '{:*)",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"health\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/rules\"\" 2>/dev/null | grep -o \"\"\\\\\"\"name\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"job\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c | sort -rn')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/query?query=up\"\" 2>/dev/null | grep -o \"\"\\\\\"\"instance\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
|
||||
"Bash(for i:*)",
|
||||
"Bash(do sleep:*)",
|
||||
"Bash(kubectl patch:*)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"cat /tmp/runner_clean.log 2>/dev/null; echo ''---''; ps aux | grep ''Runner.Listener'' | grep -v grep | wc -l\")",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod -l app=awoooi-api --tail=200)",
|
||||
"Bash(/Users/ogt/awoooi/ops/monitoring/deploy-exporters.sh:*)",
|
||||
"WebFetch(domain:github.com)",
|
||||
"WebFetch(domain:docs.ollama.com)",
|
||||
"Skill(telegram:configure)",
|
||||
"Skill(telegram:configure:*)",
|
||||
"Bash(USE_NEW_ENGINE=true pytest tests/test_incident*.py -v --tb=short -x)",
|
||||
"Bash(USE_NEW_ENGINE=true pytest tests/test_approval_field_alignment.py tests/test_learning_service.py -v --tb=short)",
|
||||
"Bash(/tmp/debug_approval.py:*)",
|
||||
"Bash(/tmp/debug_approval2.py:*)",
|
||||
"Bash(/tmp/bulk_sign.sh:*)",
|
||||
"Bash(bash /tmp/bulk_sign.sh)",
|
||||
"Bash(/tmp/check_deploy.py:*)",
|
||||
"Bash(/tmp/check_buttons.py:*)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"docker logs openclaw --since=10s 2>&1 | grep -Ev ''\\(GET|POST\\) /health'' | tail -10 && echo ''---'' && docker exec openclaw env | grep OPENAI_API_KEY | cut -c1-30\")",
|
||||
"Read(//Users/ogt/awoooi/https:/awoooi.wooo.work/_next/static/chunks/app/%5Blocale%5D/**)",
|
||||
"Bash(find /Users/ogt/awoooi/apps/web -type f \\\\\\(-name *.spec.ts -o -name *.spec.tsx \\\\\\))",
|
||||
"Bash(kubectl -n awoooi-prod get pods)",
|
||||
"Bash(kubectl -n production get pods)",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no wooo@192.168.0.121 \"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl get deployment awoooi-web -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].image}'' && echo '''' && sudo kubectl get pods -n awoooi-prod -l app=awoooi-web --no-headers\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get pods -n awoooi-prod)",
|
||||
"Bash(for run_id in 166 165)",
|
||||
"mcp__plugin_playwright_playwright__browser_navigate",
|
||||
"mcp__plugin_playwright_playwright__browser_take_screenshot",
|
||||
"Bash(open \"http://192.168.0.110:3001/wooo/awoooi/actions\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/166/jobs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=10\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runners\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/admin/runners\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=3\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/169/jobs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/179/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=180 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=2\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=181 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/172/jobs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/182/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/178\" -H \"Authorization: token $TOKEN\")",
|
||||
"mcp__plugin_playwright_playwright__browser_snapshot",
|
||||
"mcp__plugin_playwright_playwright__browser_fill_form",
|
||||
"mcp__plugin_playwright_playwright__browser_click",
|
||||
"Bash(GITEA_TOKEN=\"e6c9fecb1f0148939493ae0fa30407d28c91279d\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $GITEA_TOKEN\")",
|
||||
<<<<<<< Updated upstream
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 /tmp/a4_smoke.py)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.aider_event_service import classify_severity, should_create_incident, build_signal_data; print\\('✓ All imports successful'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py::test_build_signal_data_redacts_secrets_in_annotations -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.workers.aider_event_processor import AiderEventProcessor, get_aider_event_processor, run_aider_event_processor_loop; print\\('✓ All imports successful'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.ai_router import AIRouter; from src.db.base import get_session_factory; print\\('✓ Imports successful, no circular imports'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py tests/test_aider_event_service.py -v --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.api.v1 import aider_events; from src.workers.aider_event_processor import run_aider_event_processor_loop; from src.core.config import settings; print\\('AIDER_WEBHOOK_SECRET' in settings.__fields__, 'USE_AIDER_FEEDBACK' in settings.__fields__\\)\")",
|
||||
"Bash(AIDER_WEBHOOK_SECRET=testsecret /Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.main import app; print\\('app OK; title:', app.title\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -q)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e .[dev] --quiet)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e '.[dev]' --quiet)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from aider_watch_client.aiderw import main as awmain; from aider_watch_client.cli import main as climain; print\\('✓ imports ok'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip show aider-watch-client)",
|
||||
"Bash(tailscale status *)",
|
||||
"Bash(kubectl rollout *)",
|
||||
"Bash(bash /Users/ogt/awoooi/scripts/aider_watch_client/scripts/install.sh)",
|
||||
"Bash(git rebase *)",
|
||||
"Bash(/opt/homebrew/bin/aiderw --message \"add docstring to hello function\" --exit)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')",
|
||||
"Bash(kubectl -n awoooi-prod exec awoooi-api-7b9464c969-8ml88 -- python -c ' *)",
|
||||
"Bash(kubectl -n awoooi-prod rollout restart deployment/awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api --no-headers)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=120s)",
|
||||
"Bash(/opt/homebrew/bin/aider-watch flush *)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o wide)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=30s)",
|
||||
"Bash(kubectl -n awoooi-prod exec awoooi-api-6657fb9cf7-47lcg -- python -c \"import src.services.telegram_gateway as tg; import inspect; lines = inspect.getsource\\(tg\\); idx = lines.find\\('response_body=e.response.text'\\); print\\('FOUND' if idx >= 0 else 'NOT FOUND'\\)\")",
|
||||
"Read(//opt/gitea/**)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -q)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/unit/test_aider_event_service.py tests/unit/test_aider_model.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_aider_event_service.py tests/test_aider_event_processor.py -v)",
|
||||
"Bash(kubectl -n awoooi-prod get svc)",
|
||||
"Bash(kubectl -n openclaw get pod)",
|
||||
"Bash(kubectl -n awoooi-prod exec awoooi-api-7cd784c875-r4qkz -- python -c ' *)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=10m)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=15m)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=20m)",
|
||||
"Bash(kubectl -n awoooi-prod get secret awoooi-secrets -o yaml)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=30m)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=2h)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name} {.status.containerStatuses[0].imageID}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get ingress)",
|
||||
"Bash(kubectl -n awoooi-prod get svc awoooi-api-svc)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=60s --prefix)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=5m --prefix)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=5m)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=10m)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=90s --prefix)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-4x69p --since=5m)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 SCAN 0 MATCH \"playbook:PB-*\" COUNT 500)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 DBSIZE)",
|
||||
"Bash(wait)",
|
||||
"Read(//Users/**)",
|
||||
"Read(//Users/ooo/.claude/**)",
|
||||
"Bash(mkdir -p /Users/ogt/awoooi/.claude/agents)",
|
||||
"Bash(cp /Users/ogt/.claude/agents/*.md /Users/ogt/awoooi/.claude/agents/)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=400 -l app=awoooi-api --prefix=true)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=300 awoooi-api-65c69fd649-bxbwp)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=20000 -l app=awoooi-api --prefix=false --since=24h)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-bxbwp)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=24h -l app=awoooi-api --prefix=false)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-fmbxd)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-fmbxd)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-bxbwp)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=30 --since=30m)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -o wide)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{.items[0].metadata.creationTimestamp}')",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=5 --since=5m)",
|
||||
"Bash(kubectl -n awoooi-prod describe pod -l app=awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=20 --since=10m)",
|
||||
"Bash(kubectl -n awoooi-prod exec deployment/awoooi-api -- python3 -c ' *)",
|
||||
"Bash(PGPASSWORD=\"\" psql -h 188.188.188.188 -U aiops -d aiops -c \"\\\\d timeline_events\")",
|
||||
"Bash(kubectl -n awoooi-prod get deploy awoooi-api -o yaml)",
|
||||
"Bash(PGPASSWORD=\"\" psql --version)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- env)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=500 deploy/awoooi-api)",
|
||||
"Bash(kubectl cp *)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=up\" 2>&1 | head -c 400')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"sum\\(rate\\(http_requests_total{status=~\\\\\"5..\\\\\"}[5m]\\)\\) / sum\\(rate\\(http_requests_total[5m]\\)\\)\" \"avg\\(rate\\(container_cpu_usage_seconds_total{namespace=\\\\\"awoooi-prod\\\\\",container=\\\\\"awoooi-api\\\\\"}[5m]\\)\\)\" \"pg_stat_activity_count{datname=\\\\\"awoooi\\\\\"}\" \"increase\\(kube_pod_container_status_restarts_total{namespace=\\\\\"awoooi-prod\\\\\"}[15m]\\)\"; do echo \"---- $q\"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=$q\" 2>&1 | head -c 250; echo; done')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT metric_name, count\\(*\\), max\\(trained_at\\) FROM dynamic_baseline_record GROUP BY metric_name;\" 2>&1 | head -20')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT count\\(*\\) as asset_count FROM asset_inventory; SELECT count\\(*\\) as coverage_count FROM asset_coverage_snapshot; SELECT count\\(*\\) as host_cap_count FROM host_capacity_snapshot; SELECT count\\(*\\) as compl_count FROM asset_compliance_snapshot; SELECT count\\(*\\) as rule_cat FROM alert_rule_catalog; SELECT count\\(*\\) as log_cluster FROM log_cluster_record;\" 2>&1')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'python3 -c \" *)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- python3 -c ' *)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"http_requests_total\" \"container_cpu_usage_seconds_total\" \"container_memory_usage_bytes\" \"kube_pod_container_status_restarts_total\" \"pg_stat_activity_count\" \"node_cpu_seconds_total\" \"node_load1\"; do echo -n \"$q => \"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=count\\($q\\)\" 2>&1 | head -c 180; echo; done')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=container_cpu_usage_seconds_total\" 2>&1 | python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); rs=d[\\\\\"data\\\\\"][\\\\\"result\\\\\"][:3]; [print\\(r[\\\\\"metric\\\\\"]\\) for r in rs]; print\\(\\\\\"total series:\\\\\", len\\(d[\\\\\"data\\\\\"][\\\\\"result\\\\\"]\\)\\)\"')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'which kubectl 2>&1; kubectl version --client 2>&1 | head -3; kubectl -n awoooi-prod get deploy awoooi-api 2>&1 | head -3')",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=2000 deploy/awoooi-api)",
|
||||
"Bash(psql --version)",
|
||||
"WebFetch(domain:core.telegram.org)",
|
||||
"mcp__plugin_context7_context7__resolve-library-id",
|
||||
"mcp__plugin_context7_context7__query-docs",
|
||||
"WebFetch(domain:docs.claude.com)",
|
||||
"Bash(git tag *)",
|
||||
"Read(//usr/**)",
|
||||
"Bash(psql -h 192.168.0.110 -U awoooi_user -d awoooi -c \"SELECT id, alertname, status, confidence, description, created_at FROM approval_records WHERE status='PENDING' AND DATE\\(created_at AT TIME ZONE 'Asia/Taipei'\\) = CURRENT_DATE AT TIME ZONE 'Asia/Taipei' ORDER BY created_at DESC LIMIT 10;\")",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].imagePullPolicy}{\"\\\\n\"}{.spec.template.metadata.labels}{\"\\\\n\"}')",
|
||||
"Bash(kubectl kustomize *)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=60s)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --no-headers)",
|
||||
"Bash(kubectl -n awoooi-prod patch deployment awoooi-api -p '{\"spec\":{\"template\":{\"spec\":{\"containers\":[{\"name\":\"api\",\"image\":\"192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14\"}]}}}}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}{\"\\\\n\"}')",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get pdb awoooi-api-pdb -o jsonpath='{.spec.minAvailable}')",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o wide)",
|
||||
"Bash(kubectl -n awoooi-prod describe rs -l app=awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod get events --sort-by='.lastTimestamp')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.replicas}{\"\\\\n\"}{.status.replicas}{\"\\\\n\"}{.status.readyReplicas}{\"\\\\n\"}{.status.updatedReplicas}{\"\\\\n\"}')",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.metadata.creationTimestamp}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.conditions[*]}')",
|
||||
"Bash(kubectl -n awoooi-prod describe deployment awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod get rs -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.spec.template.spec.containers[0].image}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o yaml)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=180s)",
|
||||
"Bash(kubectl -n awoooi-prod set image deployment/awoooi-api api=192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14 --record=false)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\t\"}{.status.phase}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.replicas}{\"\\\\t\"}{.status.readyReplicas}{\"\\\\t\"}{.status.updatedReplicas}')",
|
||||
"Bash(bash /tmp/diagnostic.sh)",
|
||||
"WebFetch(domain:docs.github.com)",
|
||||
"WebFetch(domain:docs.sonarsource.com)",
|
||||
"WebFetch(domain:gitea.com)",
|
||||
"WebFetch(domain:docs.gitea.com)",
|
||||
"WebFetch(domain:www.sonarsource.com)",
|
||||
"WebFetch(domain:golangci-lint.run)",
|
||||
"WebFetch(domain:www.uber.com)",
|
||||
"Bash(bash scripts/ops/deploy-alerts.sh --dry-run)",
|
||||
"Bash(bash scripts/ops/deploy-alerts.sh)",
|
||||
"Bash(promtool check *)",
|
||||
"WebFetch(domain:openrouter.ai)",
|
||||
"WebFetch(domain:qwenlm.github.io)",
|
||||
"WebFetch(domain:aclanthology.org)",
|
||||
"WebFetch(domain:datanorth.ai)",
|
||||
"WebFetch(domain:www.infoq.com)",
|
||||
"WebFetch(domain:aws.amazon.com)",
|
||||
"WebFetch(domain:artificialanalysis.ai)",
|
||||
"WebFetch(domain:www.alibabacloud.com)",
|
||||
"WebFetch(domain:docs.langchain.com)",
|
||||
"WebFetch(domain:arxiv.org)",
|
||||
"WebFetch(domain:blog.kilo.ai)",
|
||||
"WebFetch(domain:www.siliconflow.com)",
|
||||
"WebFetch(domain:aicompetence.org)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 ping)",
|
||||
"Bash(redis-cli ping *)"
|
||||
=======
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest apps/api/tests/test_aider_event_models.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py -v --collect-only)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py --collect-only)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")"
|
||||
>>>>>>> Stashed changes
|
||||
],
|
||||
"deny": [
|
||||
"Bash(rm -rf *)",
|
||||
"Bash(git push --force *)",
|
||||
"Bash(git reset --hard *)",
|
||||
"Bash(kubectl delete *)",
|
||||
"Bash(docker rm -f *)"
|
||||
],
|
||||
"additionalDirectories": [
|
||||
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
|
||||
"/Users/ogt/awoooi/.claude/hooks",
|
||||
"/Users/ogt/.claude/channels/telegram",
|
||||
<<<<<<< Updated upstream
|
||||
"/Users/ogt",
|
||||
"/Users/ogt/.claude",
|
||||
"/Users/ogt/awoooi/apps/web/src/app/[locale]/aiops"
|
||||
]
|
||||
},
|
||||
"hooks": {
|
||||
"PreToolUse": [
|
||||
{
|
||||
"matcher": "",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/awoooi-guard.js 2>/dev/null || true"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/branch-protection.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/commit-quality.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/large-file-warner.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/mcp-health.js"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"PostToolUse": [
|
||||
{
|
||||
"matcher": "",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/audit-log.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/suggest-compact.js"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"Stop": [
|
||||
{
|
||||
"matcher": "",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/cost-tracker.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/session-summary.js"
|
||||
}
|
||||
]
|
||||
}
|
||||
=======
|
||||
"/Users/ogt/aider-watch"
|
||||
>>>>>>> Stashed changes
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,827 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(pnpm install:*)",
|
||||
"Bash(npm --version)",
|
||||
"Bash(npm install:*)",
|
||||
"Bash(pnpm --version)",
|
||||
"Bash(pnpm dev:*)",
|
||||
"Bash(pnpm add:*)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/next.config.*)",
|
||||
"Bash(pkill -f \"next dev\")",
|
||||
"Bash(curl -sL http://localhost:3000/zh-TW)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW)",
|
||||
"Bash(pnpm --filter web build)",
|
||||
"Bash(curl -s http://localhost:3001/zh-TW)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3000/zh-TW)",
|
||||
"Bash(kubectl apply:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/deploy-infra.sh)",
|
||||
"Bash(./deploy-infra.sh)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"mkdir -p /tmp/awoooi-k8s\")",
|
||||
"Bash(sshpass -p '0936223270' scp -o StrictHostKeyChecking=no /Users/ogt/awoooi/k8s/awoooi-prod/01-namespace-quota.yaml /Users/ogt/awoooi/k8s/awoooi-prod/02-network-policy.yaml /Users/ogt/awoooi/k8s/awoooi-prod/04-configmap.yaml wooo@192.168.0.120:/tmp/awoooi-k8s/)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/02-network-policy.yaml 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/04-configmap.yaml 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get ns awoooi-prod -o wide 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get networkpolicy -n awoooi-prod 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get resourcequota,limitrange,configmap -n awoooi-prod 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"rm -rf /tmp/awoooi-k8s\")",
|
||||
"Bash(PYTHONPATH=. python -c \"from src.main import app; print\\(''Import OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health/ready)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health/live)",
|
||||
"Bash(curl -s http://localhost:8000/)",
|
||||
"Bash(pkill -f \"uvicorn src.main:app\")",
|
||||
"Bash(pkill -f \"node.*next\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health)",
|
||||
"Read(//Users/ogt/awoooi/apps/api/**)",
|
||||
"Bash(pnpm typecheck:*)",
|
||||
"Read(//Users/ogt/awoooi/apps/web/**)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/dashboard/demo/spike/clear)",
|
||||
"Read(//Users/ogt/awoooi/=== 驗證英文頁面 \\(/en/**)",
|
||||
"Bash(jq \".devDependencies | keys | map\\(select\\(startswith\\(\"\"@playwright\"\"\\) or startswith\\(\"\"playwright\"\"\\)\\)\\)\")",
|
||||
"Bash(npx playwright:*)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
|
||||
"Bash(__NEW_LINE_ef548029029cdfac__ echo:*)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health -o /dev/null -w \"Backend: HTTP %{http_code}\\\\n\")",
|
||||
"Bash(echo '=== 已產出的截圖 ===' find /Users/ogt/awoooi/apps/web/test-results -name *.png)",
|
||||
"Bash(echo '=== Playwright E2E 測試結果 ===' echo echo '📸 截圖證據 \\(test-results/screenshots/\\):' ls -la /Users/ogt/awoooi/apps/web/test-results/screenshots/ __NEW_LINE_db74e5f56e34db17__ echo echo '🎬 錄影證據 \\(.webm\\):' find /Users/ogt/awoooi/apps/web/test-results -name *.webm -exec ls -la {})",
|
||||
"Bash(__NEW_LINE_db74e5f56e34db17__ echo:*)",
|
||||
"Bash(source .venv/bin/activate)",
|
||||
"Bash(python scripts/demo_multisig.py)",
|
||||
"Bash(python -c \"from src.api.v1.approvals import router; print\\(''✅ Approvals router loaded:'', len\\(router.routes\\), ''routes''\\)\")",
|
||||
"Bash(npx tsc:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
|
||||
"Bash(python -c \"from src.main import app; print\\(''✅ API loads successfully''\\)\")",
|
||||
"Bash(jq)",
|
||||
"Bash(/Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals\" -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/openapi.json)",
|
||||
"Bash(python -c \":*)",
|
||||
"Bash(curl -s http://localhost:3000 -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(lsof -ti:3000,3001,8000)",
|
||||
"Bash(curl -s http://localhost:8000/health)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/approvals/pending)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3001/zh-TW/demo)",
|
||||
"Bash(ls -la test-results/*.png)",
|
||||
"Bash(cp test-results/cpo102-*.png /Users/ogt/awoooi/docs/screenshots/)",
|
||||
"Bash(ssh ogt@192.168.0.120 'cat /etc/rancher/k3s/k3s.yaml')",
|
||||
"Bash(python -c \"from src.main import app; print\\(''✅ main.py imports OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/approvals/k8s-test)",
|
||||
"Bash(sqlite3 awoooi.db \".tables\")",
|
||||
"Bash(sshpass -p 0936223270 ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo cat /etc/rancher/k3s/k3s.yaml')",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/apps/api/k3s-prod.yaml get deployments -n awoooi-prod)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -n awoooi-prod 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -A 2>/dev/null\")",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(APPROVAL_ID=\"b58a0d86-fa4e-43ca-881c-02e978cd7943\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT operation_type, target_resource, namespace, success, dry_run_passed, dry_run_message, error_message, execution_duration_ms, created_at FROM audit_logs ORDER BY created_at DESC LIMIT 1;\" -header -column)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n monitoring -l app=grafana 2>/dev/null\")",
|
||||
"Bash(curl -s http://192.168.0.188:11434/api/tags)",
|
||||
"Bash(python -c \"from src.main import app; print\\(''✅ Compile OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/ai/status)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}')",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Output only JSON: {\\\\\"\"\"\"action\\\\\"\"\"\":\\\\\"\"\"\"test\\\\\"\"\"\"}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\"}' --max-time 30)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 60)",
|
||||
"Bash(PROMPT='你是 ClawBot AI。分析以下監控數據,輸出純 JSON(無其他文字)。:*)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d \"{\"\"model\"\":\"\"llama3.2:1b\"\",\"\"prompt\"\":\"\"$PROMPT\"\",\"\"stream\"\":false,\"\"format\"\":\"\"json\"\",\"\"options\"\":{\"\"num_predict\"\":256,\"\"temperature\"\":0.1}}\" --max-time 60)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Harbor service returning 404. Output JSON: {\\\\\"\"\"\"suggested_action\\\\\"\"\"\":\\\\\"\"\"\"RESTART_DEPLOYMENT\\\\\"\"\"\",\\\\\"\"\"\"target_resource\\\\\"\"\"\":\\\\\"\"\"\"harbor\\\\\"\"\"\",\\\\\"\"\"\"namespace\\\\\"\"\"\":\\\\\"\"\"\"default\\\\\"\"\"\",\\\\\"\"\"\"risk_level\\\\\"\"\"\":\\\\\"\"\"\"medium\\\\\"\"\"\",\\\\\"\"\"\"reasoning\\\\\"\"\"\":\\\\\"\"\"\"Service down\\\\\"\"\"\",\\\\\"\"\"\"confidence\\\\\"\"\"\":0.8,\\\\\"\"\"\"affected_services\\\\\"\"\"\":[]}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\",\"\"\"\"options\"\"\"\":{\"\"\"\"num_predict\"\"\"\":128,\"\"\"\"temperature\"\"\"\":0.1}}' --max-time 30)",
|
||||
"Bash(curl -v -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Say hello\"\"\"\",\"\"\"\"stream\"\"\"\":false}' --max-time 30)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 120)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/ai/analyze-and-propose -X POST -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/dashboard)",
|
||||
"Bash(ls -la ~/Downloads/image*.png)",
|
||||
"Bash(ls -la ~/Desktop/image*.png)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/public/*.png)",
|
||||
"WebFetch(domain:openclaw.ai)",
|
||||
"Bash(ls -la /Users/ogt/Downloads/*.png)",
|
||||
"Bash(ls -la /Users/ogt/.gemini/antigravity/brain/*/image*.png)",
|
||||
"Bash(ls -lat /Users/ogt/Downloads/*.png)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/approvals)",
|
||||
"Bash(curl -s -X GET http://localhost:8000/api/v1/approvals/)",
|
||||
"Bash(APPROVAL_ID=\"4989729e-e518-4e7e-8dff-5c3269e0c82b\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"ciso-001\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Demo CISO\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"資安確認,核准執行\"\"\"\"}')",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/webhooks/health)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://localhost:3000)",
|
||||
"Bash(ls -la apps/web/test-results/*.png)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/demo)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3333/zh-TW/demo)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/approvals/pending)",
|
||||
"Bash(curl -s -X POST http://localhost:8001/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://localhost:8001/openapi.json)",
|
||||
"Bash(curl -s http://localhost:8001/docs)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/webhooks/grafana -X OPTIONS)",
|
||||
"Bash(pnpm run:*)",
|
||||
"Bash(node scripts/screenshot-rbac.mjs)",
|
||||
"Bash(pnpm exec:*)",
|
||||
"Bash(curl -s http://localhost:3333 -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(curl -s http://localhost:3333/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Count: {d[count]}''''\\); [print\\(f''''- {a[id][:8]}... risk={a[risk_level]}''''\\) for a in d[''''approvals''''][:3]]\")",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Connected: {d[\"\"success\"\"]}''''\\); print\\(f'''' Namespaces: {d[\"\"namespaces\"\"][:3]}...''''\\)\" __NEW_LINE_57ae1c1c812968e7__ echo \"\" echo \"3. 資料庫持久化:\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as approvals FROM approval_records;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as timeline FROM timeline_events;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as audits FROM audit_logs;\")",
|
||||
"Bash(head -2 __NEW_LINE_9bf9481fbdf30d4e__ echo \"\" echo \"2. 告警收斂跳過 LLM 日誌 \\(應該有 4 次\\):\" grep -c \"alert_converged_skip_llm\" /tmp/api-server.log)",
|
||||
"Bash(python -m json.tool)",
|
||||
"Bash(__NEW_LINE_7463bff94cecc20f__ echo:*)",
|
||||
"Bash(__NEW_LINE_13846c8488c5fa9a__ echo:*)",
|
||||
"Bash(__NEW_LINE_13846c8488c5fa9a__ ls:*)",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Status: {d[\"\"status\"\"]}''''\\)\" __NEW_LINE_32366ca1bb050259__ echo \"\" echo \"2. 待簽核記錄 \\(含 hit_count\\):\" curl -s http://localhost:8000/api/v1/approvals/pending)",
|
||||
"Read(//Users/ogt/awoooi/**)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=10)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=5)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/api/*.txt /Users/ogt/awoooi/apps/api/*.toml)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/docker-compose*.yml)",
|
||||
"Bash(ls /Users/ogt/awoooi/k8s/awoooi-prod/*rbac* /Users/ogt/awoooi/k8s/awoooi-prod/*service-account*)",
|
||||
"Bash(kubectl kustomize:*)",
|
||||
"Bash(docker compose:*)",
|
||||
"Bash(docker info:*)",
|
||||
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(''''API Status:'''', d.get\\(''''status'''', ''''unknown''''\\)\\)\")",
|
||||
"Bash(pkill -9 -f uvicorn)",
|
||||
"Bash(lsof -ti:8000)",
|
||||
"Bash(open -a Docker)",
|
||||
"Bash(docker stop:*)",
|
||||
"Bash(lsof -ti:3000)",
|
||||
"Bash(docker start:*)",
|
||||
"Bash(docker ps:*)",
|
||||
"Bash(curl -s http://localhost:3000 -o /dev/null -w 'HTTP Status: %{http_code}\\\\n')",
|
||||
"Bash(curl -I http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:8000/openapi.json)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3 -w \"\\\\n--- HTTP Status: %{http_code} ---\\\\n\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\")",
|
||||
"Bash(curl -s -D - http://localhost:8000/api/v1/dashboard/stream --max-time 2)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/deploy-infra.sh)",
|
||||
"Bash(./scripts/deploy-infra.sh)",
|
||||
"Bash(pnpm --filter @awoooi/web build)",
|
||||
"Bash(timeout 10 env MOCK_MODE=true OTEL_ENABLED=false uvicorn src.main:app --host 0.0.0.0 --port 8099)",
|
||||
"Bash(timeout 8 pnpm --filter @awoooi/web dev)",
|
||||
"Bash(git diff:*)",
|
||||
"Bash(curl -s -I http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(timeout 3 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(grep -n \"NEXT_PUBLIC\\\\|API_URL\\\\|localhost\" /Users/ogt/awoooi/apps/web/.env*)",
|
||||
"Bash(timeout 2 curl -s -D - -N http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:3000/)",
|
||||
"Bash(python -m py_compile scripts/fire_test_alert.py)",
|
||||
"Bash(python -m scripts.fire_test_alert --help)",
|
||||
"Bash(python -m scripts.fire_test_alert)",
|
||||
"Bash(python -m scripts.fire_test_alert --type k8s_pod_crash)",
|
||||
"Bash(timeout 3 curl -s -N -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(python -m scripts.fire_test_alert --type disk_full)",
|
||||
"Bash(docker restart:*)",
|
||||
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://localhost:3000)",
|
||||
"Bash(docker exec:*)",
|
||||
"Bash(docker rmi:*)",
|
||||
"Bash(timeout 5 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:3000 -w \"\\\\nHTTP: %{http_code}\\\\n\")",
|
||||
"Bash(timeout 120 docker logs awoooi-api -f --since 1s)",
|
||||
"Bash(curl -s -I -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s -X OPTIONS -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" http://localhost:8000/api/v1/dashboard/stream -I)",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/verify-sse.js)",
|
||||
"Bash(python -m scripts.fire_test_alert --type db_connection_timeout)",
|
||||
"Bash(npm run:*)",
|
||||
"Bash(docker-compose down:*)",
|
||||
"Bash(docker-compose build:*)",
|
||||
"Bash(docker-compose up:*)",
|
||||
"Bash(pkill -f 'next dev')",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/test-approval-flow.js)",
|
||||
"Bash(python -m scripts.fire_test_alert --type pod_crash)",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/test-k8s-executor.js)",
|
||||
"Bash(kubectl cluster-info:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl cluster-info)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/src/app/[locale]/)",
|
||||
"Bash(python -c \"from src.api.v1 import audit_logs; print\\(''API module loads OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/action-logs)",
|
||||
"Bash(pnpm build:*)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/audit-logs)",
|
||||
"Bash(xargs -r kill -9 2)",
|
||||
"Bash(/dev/null source:*)",
|
||||
"Bash(python -c \"from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor; print\\(''''httpx ok''''\\)\")",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT * FROM audit_logs ORDER BY created_at DESC LIMIT 5;\")",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT name FROM sqlite_master WHERE type=''table'';\")",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT id, event_type, status, title, created_at FROM timeline_events ORDER BY created_at DESC LIMIT 5;\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/audit-logs/stats)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/timeline?limit=10)",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/timeline\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/docs)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/setup-guardrails.sh /Users/ogt/awoooi/scripts/ai_code_reviewer.py)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/.eslintrc*)",
|
||||
"Bash(ls -la scripts/*.py scripts/*.sh .pre-commit-config.yaml .secrets.baseline apps/web/.eslintrc.js)",
|
||||
"Bash(python -m src.services.test_context_gatherer)",
|
||||
"Bash(python -m pytest src/services/test_context_gatherer.py -v)",
|
||||
"Bash(grep -r \"ClawBot\\\\|clawbot\\\\|CLAWBOT\" --include=*.py --include=*.ts --include=*.tsx apps/)",
|
||||
"Bash(python scripts/e2e_openclaw_test.py)",
|
||||
"Bash(python -m pytest tests/e2e_network_test.py -v --tb=short)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/apply_prometheus_config.sh /Users/ogt/awoooi/apps/api/scripts/fire_live_alert.py)",
|
||||
"Bash(./scripts/apply_prometheus_config.sh)",
|
||||
"Bash(python scripts/fire_live_alert.py oomkilled)",
|
||||
"Bash(python scripts/fire_live_alert.py oomkilled --api-url http://localhost:8000)",
|
||||
"Bash(python scripts/fire_live_alert.py highcpu --api-url http://localhost:8000)",
|
||||
"Bash(python scripts/fire_live_alert.py podcrash --api-url http://localhost:8000)",
|
||||
"Bash(python -m pytest tests/test_webhook_telegram_integration.py -v)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/api/.env*)",
|
||||
"Bash(ls -la /Users/ogt/wooo-aiops/.env*)",
|
||||
"Bash(ls -la /Users/ogt/AIOps/.env*)",
|
||||
"Bash(/Users/ogt/awoooi/apps/api/.env:*)",
|
||||
"Bash(/tmp/deploy-188-home.sh:*)",
|
||||
"Bash(chmod +x /tmp/deploy-188-home.sh)",
|
||||
"Bash(scp /tmp/awoooi-api-deploy.tar.gz /tmp/deploy-188-home.sh ollama@192.168.0.188:/tmp/)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"bash /tmp/deploy-188-home.sh\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"curl -s http://localhost:8000/api/v1/webhooks/health\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"tail -50 /tmp/openclaw.log\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && source .venv/bin/activate && pip install sqlalchemy aiosqlite -q && pip install httpx python-dotenv pydantic-settings -q\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''uvicorn src.main:app'' 2>/dev/null; sleep 1; source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > /tmp/openclaw.log 2>&1 & sleep 3 && curl -s http://localhost:8000/api/v1/webhooks/health\")",
|
||||
"Bash(ssh ollama@192.168.0.188:*)",
|
||||
"Bash(pkill -f ngrok)",
|
||||
"Bash(pkill -f \"ssh -fN.*8001\")",
|
||||
"Bash(ssh -fN -L 8001:localhost:8000 ollama@192.168.0.188)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/webhooks/health)",
|
||||
"Bash(BOT_TOKEN=\"8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk\" curl -s \"https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo\")",
|
||||
"Bash(curl -s https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/webhooks/)",
|
||||
"Bash(curl -s http://localhost:8001/)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/health)",
|
||||
"Bash(scp /tmp/awoooi-api-v7.tar.gz ollama@192.168.0.188:/tmp/)",
|
||||
"Bash(tar -czvf /tmp/awoooi-api-v7.1.tar.gz src/ requirements.txt pyproject.toml)",
|
||||
"Bash(scp /tmp/awoooi-api-v7.1.tar.gz ollama@192.168.0.188:/tmp/)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"tail -10 /tmp/openclaw.log | grep -E ''''clickhouse|signoz_gold''''\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"cd /home/ollama/awoooi-api && tail -50 nohup.out 2>/dev/null || journalctl -u awoooi-api --no-pager -n 50 2>/dev/null || echo ''請手動檢查日誌''\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8123/ -d \"SELECT 1 FORMAT JSONEachRow\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:11434/api/tags)",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 root@192.168.0.188 \"echo ok\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8001/health)",
|
||||
"Bash(ssh root@192.168.0.188 \"cat /tmp/openclaw.log 2>/dev/null | tail -100 || echo ''Log file not found''\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/signoz_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/telegram_gateway.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/webhooks.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/models/ai.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/models/)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''''uvicorn src.main:app'''' && sleep 2 && nohup .venv/bin/python3 -m uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/health)",
|
||||
"Bash(curl -s --connect-timeout 10 http://192.168.0.188:8000/health)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% test\"\"}')",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% - 統帥全自主驗收 v2\"\"}')",
|
||||
"Bash(curl -s --connect-timeout 30 --max-time 120 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s --connect-timeout 30 --max-time 180 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"k8s_pod_crash\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"inventory-api\"\",\"\"namespace\"\":\"\"commerce\"\",\"\"message\"\":\"\"Pod crash - 統帥終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo OK && ps aux | grep uvicorn | grep -v grep | head -2\")",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"ssl_expiry\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"nginx-ingress\"\",\"\"namespace\"\":\"\"ingress\"\",\"\"message\"\":\"\"SSL 即將過期 - 終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"db_connection_timeout\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"postgres-primary\"\",\"\"namespace\"\":\"\"database\"\",\"\"message\"\":\"\"DB 連線逾時 - SignOz 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"service_404\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"auth-service\"\",\"\"namespace\"\":\"\"identity\"\",\"\"message\"\":\"\"Service 404 - SignOz + Ollama 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"warning\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"recommendation-engine\"\",\"\"namespace\"\":\"\"ml\"\",\"\"message\"\":\"\"CPU 78% - Ollama 最終測試\"\"}' --connect-timeout 30 --max-time 200)",
|
||||
"Bash(scp apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/openclaw.py)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/http_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/main.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/config.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/health.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
|
||||
"Bash(ssh -o ConnectTimeout=5 ollama@192.168.0.188 \"ps aux | grep uvicorn | grep -v grep\")",
|
||||
"Bash(curl -s -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" -X OPTIONS http://192.168.0.188:8000/api/v1/health -v)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/health)",
|
||||
"Bash(curl -s -N --max-time 3 http://192.168.0.188:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(open http://localhost:3000/zh-TW)",
|
||||
"Bash(open http://localhost:3001/zh-TW)",
|
||||
"Bash(curl -s -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/dashboard/stream --max-time 3)",
|
||||
"Bash(curl -s -I -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/health)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/pending)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals?status=pending_approval\")",
|
||||
"Bash(xargs sed:*)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/history?limit=5\")",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/approved)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline?limit=10\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/action-logs\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=10\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"kubectl get nodes\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/k8s-test\")",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/k3s-prod.yaml ogt@192.168.0.188:~/awoooi-api/k3s-prod.yaml)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"cat /etc/rancher/k3s/k3s.yaml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"echo ''SSH OK'' && pwd\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''SSH OK'' && pwd && ls -la ~/awoooi-api/ 2>/dev/null || echo ''Directory not found''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sshpass -p ''0936223270'' scp -o StrictHostKeyChecking=no wooo@192.168.0.120:/etc/rancher/k3s/k3s.yaml ~/awoooi-api/k3s-prod.yaml && sed -i ''s/127.0.0.1/192.168.0.120/g'' ~/awoooi-api/k3s-prod.yaml && echo ''Kubeconfig deployed!'' && head -10 ~/awoooi-api/k3s-prod.yaml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn'' 2>/dev/null; sleep 1; nohup .venv/bin/uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload > nohup.out 2>&1 & sleep 3; echo ''=== API Restarted ==='' && tail -20 nohup.out\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn src.main'' || true\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 5)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ollama@192.168.0.188 \"cd ~/awoooi-api && source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
|
||||
"Bash(sshpass -p:*)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 10)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=8\")",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -s http://localhost:8000/api/v1/approvals/pending | jq -r \"\".approvals[] | \\\\\"\"ID: \\\\\\(.id\\) | Action: \\\\\\(.action\\)\\\\\"\"\"\"')",
|
||||
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/health)",
|
||||
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/approvals/pending)",
|
||||
"Bash(ssh ollama@192.168.70.188 \"ps aux | grep uvicorn | grep -v grep | head -3\")",
|
||||
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.70.188 \"echo ''SSH Connected''\")",
|
||||
"Bash(ping -c 2 -t 5 192.168.70.188)",
|
||||
"Bash(curl -s --connect-timeout 10 https://awoooi.wooo.tw/api/v1/health)",
|
||||
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo ''SSH Connected to 188 Base''\")",
|
||||
"Bash(grep -B 5 -A 30 \"async def add_signature\" /Users/ogt/awoooi/apps/api/src/services/*.py)",
|
||||
"Bash(ssh ogt@192.168.0.188 \"cd /home/ogt/awoooi && docker compose ps\")",
|
||||
"Bash(ls -la .env*)",
|
||||
"Bash(.env:*)",
|
||||
"Bash(timeout 15 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
|
||||
"Bash(timeout 20 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
|
||||
"Bash(timeout 25 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
|
||||
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ogt@192.168.0.188 \"cd /home/ogt/wooo-aiops && docker compose ps clawbot 2>/dev/null || docker ps | grep -i claw\")",
|
||||
"Bash(ls -la ~/.ssh/*.pub)",
|
||||
"Bash(ssh -i ~/.ssh/id_rsa -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o PasswordAuthentication=no ogt@192.168.0.188 \"echo connected\")",
|
||||
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/logOut\")",
|
||||
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/close\")",
|
||||
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/getUpdates?timeout=3&limit=1\")",
|
||||
"Bash(ping -c 1 192.168.0.188)",
|
||||
"Bash(python -m tests.test_redis_multisig)",
|
||||
"Bash(curl -v -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(python3 -c \":*)",
|
||||
"Bash(echo ' 無法連線' __NEW_LINE_8fc87454f9798a7d__ echo echo [結論]: echo ' /signals 端點尚未部署到 .188' echo ' 程式碼已完成,需要執行:' echo \" cd apps/api && docker build -t awoooi-api . && docker-compose up -d\")",
|
||||
"Bash(__NEW_LINE_dc88f37970737861__ cd:*)",
|
||||
"Bash(__NEW_LINE_dc88f37970737861__ echo:*)",
|
||||
"Read(//Users/**)",
|
||||
"Bash(tail -20 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 2] 等待容器啟動 \\(10 秒\\)...\" sleep 10 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 3] 檢查容器狀態...\" docker compose ps)",
|
||||
"Bash(tail -5 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.2] 重建 API 容器 \\(含 Signal Worker\\)...\" docker compose build api)",
|
||||
"Bash(1 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.4] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.5] 檢查容器狀態...\" docker compose ps)",
|
||||
"Bash(__NEW_LINE_f4c8301ec5249760__ echo:*)",
|
||||
"Bash(__NEW_LINE_21ba3cf3700d942d__ cd:*)",
|
||||
"Bash(1 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.3] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.4] 檢查容器狀態...\" docker compose ps api)",
|
||||
"Bash(1 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[2] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[3] 發送測試 Signal...\" curl -s -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(__NEW_LINE_564908ddf866c081__ echo:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase63_aggregation.py)",
|
||||
"Bash(python scripts/test_phase63_aggregation.py)",
|
||||
"Bash(xargs -r docker exec -i awoooi-redis redis-cli DEL)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_race_condition.py)",
|
||||
"Bash(python scripts/test_race_condition.py)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase64_proposal.py)",
|
||||
"Bash(python scripts/test_phase64_proposal.py)",
|
||||
"Bash(python agent.py --alert FINAL_PHASE_6_TEST)",
|
||||
"Bash(AWOOOI_REDIS_URL=\"redis://localhost:6379/0\" python agent.py --alert FINAL_PHASE_6_TEST)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/incidents)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-06085B/proposal)",
|
||||
"Bash(grep -r \"mock\\\\|Mock\\\\|MOCK\\\\|fake\\\\|Fake\\\\|dummy\\\\|hardcode\" /Users/ogt/awoooi/apps/web/src --include=*.tsx --include=*.ts -l)",
|
||||
"Bash(NEXT_PUBLIC_API_URL=http://localhost:8000 pnpm next build --no-lint)",
|
||||
"Bash(grep -v \"Traceback\\\\|File \"\"/usr\\\\|^\\\\s*$\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Signal Count: {len\\(d[\"\"signals\"\"]\\)}''''\\); [print\\(f'''' - {s[\"\"alert_name\"\"]} \\({s[\"\"signal_id\"\"]}\\)''''\\) for s in d[''''signals'''']]\")",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3003/zh-TW)",
|
||||
"Bash(curl -s -X GET \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3003\" -H \"Access-Control-Request-Method: GET\" -v)",
|
||||
"Bash(grep -r TELEGRAM /Users/ogt/awoooi/apps/api/.env*)",
|
||||
"Bash(grep -r TELEGRAM_BOT_TOKEN /Users/ogt/awoooi --include=*.env* --include=*.yaml --include=*.yml)",
|
||||
"Bash(curl -s -I -X OPTIONS \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\")",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
|
||||
"Bash(python /tmp/e2e_drill.py)",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); i=[x for x in d[''''incidents''''] if x[''''incident_id'''']==''''INC-20260322-06085B''''][0]; print\\(f\"\"Incident: {i[''''incident_id'''']}\"\"\\); print\\(f\"\"Signals: {i[''''signal_count'''']}\"\"\\); print\\(f\"\"Updated: {i[''''updated_at'''']}\"\"\\)\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"15ab6844-ca4e-4a13-aead-dc71cd342445\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"api-gateway\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL - PodCrashLoopBackOff\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"RESTART_DEPLOYMENT\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
|
||||
"Bash(curl -s -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\" http://localhost:3000/zh-TW)",
|
||||
"Bash(curl -s -I \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-19DF60/proposal)",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"942e762e-fb97-480f-b21a-d3be67fa70b1\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"core-system\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL TAKE 2 - 二次實彈演習\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"INVESTIGATE_SERVICE\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Accept: application/json\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Incidents: {d[\"\"count\"\"]}''''\\); [print\\(f'''' - {i[\"\"incident_id\"\"]} | {i[\"\"severity\"\"]} | {i[\"\"signal_count\"\"]} signals | {i[\"\"affected_services\"\"]}''''\\) for i in d[''''incidents'''']]\")",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/approvals/pending\" -H \"Origin: http://localhost:3000\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Pending: {d[\"\"count\"\"]} approvals''''\\); [print\\(f'''' - {a[\"\"id\"\"][:8]}... | {a[\"\"risk_level\"\"]} | {a[\"\"action\"\"][:30]}...''''\\) for a in d[''''approvals''''][:3]]\")",
|
||||
"Bash(mkdir -p /Users/ogt/awoooi/apps/web/public/fonts)",
|
||||
"Bash(curl -sL -o DSEG7Classic-Bold.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff2\")",
|
||||
"Bash(curl -sL -o DSEG7Classic-Bold.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff\")",
|
||||
"Bash(curl -sL -o DSEG7Classic-Regular.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff2\")",
|
||||
"Bash(curl -sL -o DSEG7Classic-Regular.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff\")",
|
||||
"Bash(pnpm next:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/bootstrap_prod.sh)",
|
||||
"Bash(/Users/ogt/awoooi/.env:*)",
|
||||
"Bash(grep -E \"^\\\\.env$|03-secrets\\\\.yaml\" .gitignore)",
|
||||
"Bash(echo 'Adding to .gitignore...' if ! grep -q ^.env$ .gitignore)",
|
||||
"Bash(then echo:*)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git commit:*)",
|
||||
"Bash(git push:*)",
|
||||
"Bash(git remote:*)",
|
||||
"Bash(gh repo:*)",
|
||||
"Bash(gh api:*)",
|
||||
"Bash(gh run:*)",
|
||||
"Bash(ls -la pnpm-*.yaml package.json turbo.json)",
|
||||
"Bash(git status:*)",
|
||||
"Bash(gh workflow:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod 2>&1 | grep -i ''cors'' -A 5 -B 5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-79948cbbbf-b8cgj -n awoooi-prod --tail=100\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o name | tail -1 | xargs kubectl logs -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.OPENCLAW_TG_USER_WHITELIST}'' | base64 -d\")",
|
||||
"Bash(ssh wooo@192.168.0.120 'kubectl patch secret awoooi-secrets -n awoooi-prod --type='\"''\"'json'\"''\"' -p='\"''\"'[:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api -n awoooi-prod && kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod && kubectl rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-747967b787-fcx2r -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"ps aux | grep -E ''actions-runner|Runner'' | grep -v grep\")",
|
||||
"Bash(curl -sf http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fd795cd87-rdpgn -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health | jq .status\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://localhost:32334/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.125.201:8000/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.105.105:3000/ -o /dev/null && echo ''Web OK''\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"ls -la /etc/nginx/sites-available/\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-795c95ff76-wch2p -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && ss -tlnp | grep 32334\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://127.0.0.1:32334/api/v1/health | head -c 200\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo ufw status 2>/dev/null || sudo iptables -L INPUT -n | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health | head -c 100\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -v --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 | head -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service 2>/dev/null | grep -i exec || ps aux | grep k3s | head -3\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"netstat -tlnp 2>/dev/null | grep 32334 || ss -tlnp | grep 32334\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:31234/health 2>&1 | head -c 100\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-nginx-ingress -n awoooi-prod -o yaml\")",
|
||||
"Bash(curl -sk https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -sk -I -X OPTIONS https://awoooi.wooo.work/api/v1/health -H \"Origin: https://awoooi.wooo.work\" -H \"Access-Control-Request-Method: GET\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo iptables -t nat -L KUBE-NODEPORTS -n 2>/dev/null | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo netstat -tlnp | grep -E ''32334|32335''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tlnp 2>/dev/null | grep -E ''32334|32335'' || netstat -tln | grep -E ''32334|32335''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tln\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120; /home/wooo/bin/kubectl get svc -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"which kubectl || find /usr -name kubectl 2>/dev/null | head -1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod && kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ls -la /home/wooo/.kube/ && cat /home/wooo/.kube/config-120 2>/dev/null | head -20 || cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo cat /etc/rancher/k3s/k3s.yaml | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"which kubectl 2>/dev/null || find /home/wooo -name kubectl 2>/dev/null | head -1 || ls -la /home/wooo/bin/\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl describe pod awoooi-api-546b88465d-lb8zm -n awoooi-prod | tail -40\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- wget -qO- http://localhost:8000/api/v1/health 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 20 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 120:32334''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 121:32334''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"ssh wooo@192.168.0.120 ''cat /etc/rancher/k3s/k3s.yaml 2>/dev/null || echo No k3s.yaml''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide | grep Running\")",
|
||||
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
|
||||
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.121 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
|
||||
"Bash(pip3 show:*)",
|
||||
"Bash(docker build:*)",
|
||||
"Bash(docker version:*)",
|
||||
"Bash(docker run:*)",
|
||||
"Bash(curl -vI -H \"Origin: https://awoooi.wooo.work\" http://localhost:8889/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get endpoints awoooi-api-svc -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo -n ufw status 2>/dev/null || sudo -n iptables -L INPUT -n 2>/dev/null | head -20 || echo ''Need sudo for firewall check''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ps aux | grep -E ''kube-proxy|k3s'' | grep -v grep | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /proc/sys/net/ipv4/ip_forward\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s 2>/dev/null | head -15 || ps aux | grep ''k3s server'' | grep -v grep\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://127.0.0.1:32334/api/v1/health 2>&1 || echo ''LOCALHOST NodePort FAILED''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''EXTERNAL IP NodePort FAILED''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/iptables/rules.v4 2>/dev/null || iptables-save 2>/dev/null | grep -E ''DROP|REJECT|32334|32335'' | head -10 || echo ''Cannot read iptables without sudo''\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''Worker->Master NodePort FAILED''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || ls -la /etc/rancher/k3s/ 2>/dev/null || echo ''No K3s config found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"netstat -an 2>/dev/null | grep 32334 || ss -an | grep 32334 || echo ''No socket found for 32334''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L INPUT -n 2>&1 | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L KUBE-ROUTER-INPUT -n 2>&1 | head -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | grep -i awoooi || echo ''NO AWOOOI RULES FOUND''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc awoooi-api-svc -n awoooi-prod -o yaml | grep -A5 ''spec:''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get networkpolicy -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl apply -f - 2>&1\")",
|
||||
"Bash(curl -sf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -skf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -sI https://awoooi.wooo.work/)",
|
||||
"Bash(curl -skI https://awoooi.wooo.work/)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl rollout restart deployment/awoooi-api -n awoooi-prod && /home/wooo/kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
|
||||
"Bash(curl -sf https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -skf https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 40 2>&1\")",
|
||||
"Bash(for i:*)",
|
||||
"Bash(do curl:*)",
|
||||
"Bash(echo \"Request $i sent\")",
|
||||
"Bash(done)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep OTEL\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- env | grep OTEL\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c \"\"import socket; s=socket.socket\\(\\); s.settimeout\\(5\\); s.connect\\(\\(''192.168.0.188'', 24317\\)\\); print\\(''✅ Connection to 24317 OK''\\); s.close\\(\\)\"\" 2>&1\")",
|
||||
"Bash(curl -vI https://awoooi.wooo.work)",
|
||||
"Bash(curl -vI https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -sf -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{\"\"source\"\": \"\"prometheus\"\", \"\"severity\"\": \"\"P1\"\", \"\"message\"\": \"\"Test alert from CLI\"\"}')",
|
||||
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data.WEBHOOK_HMAC_SECRET}'''' 2>/dev/null\")",
|
||||
"Bash(timeout 15 curl -N -s https://awoooi.wooo.work/api/v1/dashboard/stream)",
|
||||
"Bash(bash:*)",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/metrics/gold)",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT metric_name FROM signoz_metrics.distributed_samples_v4 WHERE unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 1800\\) * 1000 LIMIT 20 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as trace_count FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' | python3 -m json.tool 2>/dev/null | head -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"which kubectl || ls -la ~/bin/kubectl 2>/dev/null || ls -la /usr/local/bin/kubectl 2>/dev/null || echo ''kubectl not found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ls -la ~/.kube/ 2>/dev/null; cat ~/.kube/config 2>/dev/null | head -20 || echo ''checking k3s default...''; sudo cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -5 || echo ''no k3s config''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl get configmap awoooi-config -n awoooi-prod -o yaml 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(nc -zv 192.168.0.188 24317)",
|
||||
"Bash(curl -s http://192.168.0.188:24318/v1/traces -X POST -H \"Content-Type: application/json\" -d '{}')",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 24 HOUR GROUP BY serviceName ORDER BY cnt DESC LIMIT 20 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT service_name, count\\(\\) as cnt FROM signoz_logs.distributed_logs WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE GROUP BY service_name ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SHOW TABLES FROM signoz_logs FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as total FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT JSONExtractString\\(resources_string, ''service.name''\\) as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_logs.distributed_logs_v2 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT resources_string[''service.name''] as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 300000000000\\) GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT body, resources_string FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 60000000000\\) LIMIT 1 FORMAT JSONEachRow\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 2 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, timestamp FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE ORDER BY timestamp DESC LIMIT 5 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, formatDateTime\\(timestamp, ''%Y-%m-%d %H:%M:%S''\\) as ts FROM signoz_traces.distributed_signoz_index_v2 ORDER BY timestamp DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_spans FORMAT TabSeparated\")",
|
||||
"Bash(ssh wooo@192.168.0.188 \"docker ps | grep -E ''otel|signoz''\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT metric_name, sum\\(value\\) as total FROM signoz_metrics.distributed_samples_v4 WHERE metric_name LIKE ''otelcol%span%'' AND unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 300\\) * 1000 GROUP BY metric_name FORMAT TabSeparated\")",
|
||||
"Bash(for t:*)",
|
||||
"Bash(do)",
|
||||
"Bash(echo -n \"$t: \")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.$t FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \":*)",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v3 FORMAT TabSeparated\")",
|
||||
"Bash(AWOOOI_API_URL=https://awoooi.wooo.work WEBHOOK_HMAC_SECRET=\"CHANGE_ME_TO_RANDOM_64_CHARS\" python scripts/fire_live_alert.py oomkilled)",
|
||||
"Bash(timeout 10 curl -sN https://awoooi.wooo.work/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/dashboard)",
|
||||
"Bash(npm list:*)",
|
||||
"Bash(node scripts/verify-frontend.js)",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/verify-frontend.js)",
|
||||
"Bash(python -c \"from src.services.proposal_service import ProposalService; print\\(''''✅ ProposalService OK''''\\)\")",
|
||||
"Bash(python -c \"from src.services.openclaw import OpenClawService; print\\(''''✅ OpenClawService OK''''\\)\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents)",
|
||||
"Bash(jq -r \".incidents[:2] | .[] | \"\"\\\\\\(.incident_id\\) - \\\\\\(.status\\) - \\\\\\(.severity\\)\"\"\")",
|
||||
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
|
||||
"Bash(kubectl logs:*)",
|
||||
"Bash(ssh ogt@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30\")",
|
||||
"Bash(curl -sv -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152\")",
|
||||
"Bash(curl -sv \"http://192.168.0.120:32334/api/v1/incidents\")",
|
||||
"Bash(curl -s --retry 3 --retry-delay 2 \"http://192.168.0.120:32334/api/v1/health\")",
|
||||
"Bash(curl -s --retry 3 --retry-delay 2 http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(do echo:*)",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/proposal\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-D6C6A0/proposal\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/approvals/pending)",
|
||||
"Bash(kubectl get:*)",
|
||||
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(curl -s http://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -s http://awoooi.wooo.work/api/v1/approvals/pending)",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/approvals/pending -k)",
|
||||
"Bash(ssh root@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh root@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=30\")",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/timeline -k)",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/incidents -k)",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals?include_history=true\" -k)",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152\" -k)",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/audit-logs?limit=10\" -k)",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/audit-logs?limit=10 -k)",
|
||||
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=100\")",
|
||||
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-web --tail=50\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml logs -n awoooi-prod -l app=awoooi-api --tail=100 2>/dev/null || docker logs awoooi-api --tail=100 2>/dev/null\")",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals/pending\" -k -w \"\\\\n\\\\nHTTP: %{http_code}\\\\nTime: %{time_total}s\\\\n\")",
|
||||
"Bash(curl -sL -X POST https://awoooi.wooo.work/api/v1/approvals/182e07c1-118a-49d7-b71c-7d33c5484d9b/sign -H 'Content-Type: application/json' -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"test-debug\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Debug Test\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"Testing\"\"\"\"}' -k)",
|
||||
"Bash(curl -s https://wwooo.aiops.tw/api/v1/health)",
|
||||
"Bash(curl -s https://wwooo.aiops.tw/api/v1/incidents?limit=5)",
|
||||
"Bash(curl -s https://wwooo.aiops.tw/api/v1/approvals/pending)",
|
||||
"Bash(curl -v -s \"https://wwooo.aiops.tw/api/v1/health\")",
|
||||
"Bash(curl -s \"https://wwooo.aiops.tw/\")",
|
||||
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/health\")",
|
||||
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/incidents?limit=5\")",
|
||||
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-867f67f55d-kvdl2 -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod --show-labels | grep worker\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector/matchLabels\"\", \"\"value\"\": {\"\"system\"\": \"\"awoooi\"\"}}]''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=15\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=40\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -E ''signal_worker|redis_pool|INFO'' | tail -10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 'curl -s -X POST \"\"http://localhost:32334/api/v1/webhooks/signals\"\" -H \"\"Content-Type: application/json\"\" -d \"\"{:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker|api''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && echo ''==='' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/incidents?limit=5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/approvals/pending\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | head -50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''.components''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret -n awoooi-prod -o name\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.WEBHOOK_HMAC_SECRET}'' | base64 -d\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=20 2>&1 | grep -E ''signal|incident|telegram|INFO''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=5''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''telegram|notification|send'' | tail -10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=2'' && echo ''---'' && curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker && echo ''---'' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod --tail=40\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o jsonpath=''{.spec.podSelector}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector\"\", \"\"value\"\": {\"\"matchLabels\"\": {\"\"system\"\": \"\"awoooi\"\"}}}]''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=20\")",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/api/scripts/fire*.py)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=3''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''proposal|approval|llm|ai|ollama|generate'' | tail -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''''{.data}''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data}'' | tr '','' ''\\\\n''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c ''import os; print\\(os.getenv\\(\"\"DATABASE_URL\"\", \"\"NOT SET\"\"\\)[:50]\\)''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep DATABASE\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"PGPASSWORD=''CHANGE_ME'' psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c ''SELECT 1'' 2>&1 || echo ''Connection failed''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(curl -sv http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=20 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-7fb7d5b55f-n48gk -n awoooi-prod --tail=20 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-api-75ffbfb88b -n awoooi-prod --replicas=0\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-worker-7fb7d5b55f -n awoooi-prod --replicas=0\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-api -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=5s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout history deployment/awoooi-api -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-worker -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=30s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs awoooi-api-6687db5564 -n awoooi-prod -o jsonpath=''{.metadata.annotations.deployment\\\\.kubernetes\\\\.io/revision}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-api-7f487f7cbb-5f88g -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod --to-revision=46\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=15\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents?limit=3)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=2m\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --since=2m | grep -i webhook\")",
|
||||
"Bash(curl -sv -X POST http://192.168.0.120:32334/api/v1/webhooks/alertmanager -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get endpoints -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''{status}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=30s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fc4744758-7wfv5 -n awoooi-prod --tail=30 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6fc548887b-b9mtf -n awoooi-prod --tail=30 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o yaml\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data}''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pod awoooi-worker-6fc548887b-b9mtf -n awoooi-prod -o jsonpath=''{.metadata.labels}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o yaml\")",
|
||||
"Bash(ssh wooo@192.168.0.120 'kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=json -p=\"\"[{\\\\\"\"op\\\\\"\": \\\\\"\"replace\\\\\"\", \\\\\"\"path\\\\\"\": \\\\\"\"/spec/podSelector/matchLabels\\\\\"\", \\\\\"\"value\\\\\"\": {\\\\\"\"system\\\\\"\": \\\\\"\"awoooi\\\\\"\"}}]\"\"')",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-6c69b77894-d6jqq -n awoooi-prod --tail=20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl run nc-test --rm -it --restart=Never --image=busybox -- nc -zv 192.168.0.188 5432\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o=custom-columns=''NAME:.metadata.name,IMAGE:.spec.containers[0].image''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- ls -la *.db 2>/dev/null || echo ''No SQLite files''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep -E ''MOCK|DATABASE|SQLITE''\")",
|
||||
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/approvals\")",
|
||||
"Bash(python -m py_compile src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/skills/loader.py)",
|
||||
"Bash(python packages/lewooogo-brain/tests/test_skill_loader.py)",
|
||||
"Bash(python packages/lewooogo-brain/tests/test_incident_engine.py)",
|
||||
"Bash(python packages/lewooogo-brain/tests/test_guardrails.py)",
|
||||
"Bash(python -m py_compile src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/skills/loader.py)",
|
||||
"Bash(PYTHONPATH=/Users/ogt/awoooi/packages/lewooogo-brain/src python -c \":*)",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/health)",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals?status=pending\")",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/incidents\")",
|
||||
"Bash(uv sync:*)",
|
||||
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router 語法驗證通過''\\)\")",
|
||||
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
|
||||
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/incidents\" --connect-timeout 10)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" \"https://awoooi.wooo.work\" --connect-timeout 10)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work\" --connect-timeout 10)",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/test-123/propose\" -H \"Content-Type: application/json\" -d '{\"\"require_dry_run\"\": true}' --connect-timeout 10)",
|
||||
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ollama@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs awoooi-api-64c8659cff-grslz -n awoooi-prod --tail=50)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.DATABASE_URL}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-api)",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work/zh-TW\" --connect-timeout 10)",
|
||||
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router import successful''\\)\")",
|
||||
"Bash(PGPASSWORD=postgres psql -h 192.168.0.188 -U awoooi -d awoooi_dev -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
|
||||
"Bash(PGPASSWORD=AwoooiProd2026 psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
|
||||
"Bash(curl -sf http://192.168.0.120:32334/api/v1/incidents)",
|
||||
"Bash(curl -v \"http://192.168.0.120:32334/api/v1/incidents\")",
|
||||
"Bash(export KUBECONFIG=/Users/ogt/.kube/config-120)",
|
||||
"Bash(curl -sI \"http://awoooi.wooo.work/\")",
|
||||
"Bash(openssl s_client -servername awoooi.wooo.work -connect awoooi.wooo.work:443)",
|
||||
"Bash(openssl x509:*)",
|
||||
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260323-7DE10B/propose\" -H \"Content-Type: application/json\" -d '{\"\"\"\"require_dry_run\"\"\"\": true}')",
|
||||
"Bash(python -c \"from src.services.executor import execute_approved_proposal, get_executor, ActionExecutor; print\\(''✅ Import successful''\\)\")",
|
||||
"Bash(curl -s https://awoooi.woooo.cc/api/v1/incidents)",
|
||||
"Bash(curl -s https://awoooi.woooo.cc/api/v1/health)",
|
||||
"Bash(curl -s --connect-timeout 10 https://awoooi.woooo.cc/api/v1/health)",
|
||||
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi 2>/dev/null\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.70.200:8000/api/v1/health)",
|
||||
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -A)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-7479556d76-jbbps --tail 30)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-api --tail 20)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- curl -s http://localhost:8000/api/v1/incidents)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import httpx; r = httpx.get\\(''http://localhost:8000/api/v1/incidents''\\); print\\(r.text\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get ingress -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].env}')",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.70.202:32334/api/v1/health)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-worker -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-api -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secrets -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.REDIS_URL}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-worker -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-worker)",
|
||||
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/incidents)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 10)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n wooo-aiops-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -A)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-76bdf9786d-rvtmz --tail 15)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import os; print\\(os.getenv\\(''REDIS_URL'', ''NOT_SET''\\)\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-865cdc97db-6mpzz --tail 20)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod -l app=redis)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n wooo-aiops-prod redis-6c6fcd64b8-8wznx -- redis-cli ping)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import redis; r=redis.Redis\\(host=''10.43.239.47'', port=6379, db=10\\); print\\(r.ping\\(\\)\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -A)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type='json' -p='[{\"\"op\"\": \"\"add\"\", \"\"path\"\": \"\"/spec/egress/0/ports/-\"\", \"\"value\"\": {\"\"port\"\": 6379, \"\"protocol\"\": \"\"TCP\"\"}}]')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-5fcc484b85-qpwt6 --tail 15)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import os; print\\(''REDIS_URL:'', os.getenv\\(''REDIS_URL''\\)\\); import redis; r=redis.Redis.from_url\\(os.getenv\\(''REDIS_URL''\\)\\); print\\(''PING:'', r.ping\\(\\)\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-59d7588d75-p5tht --tail 20)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 30)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl apply -f -)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-6cd7dcbc9-5mtfq --tail 15)",
|
||||
"Bash(jq .incidents[0])",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath='{.data.OPENCLAW_URL}')",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/health)",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/)",
|
||||
"Bash(nc -zv 192.168.0.188 8088 -w 5)",
|
||||
"Bash(ping -c 2 192.168.0.188)",
|
||||
"Bash(ping -c 2 192.168.70.202)",
|
||||
"Bash(grep -n \"mapToDualState\" /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx -A 30)",
|
||||
"Bash(head -40 /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx)",
|
||||
"Bash(ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw; docker start openclaw 2>/dev/null || docker start clawbot 2>/dev/null || echo ''Container not found, listing all:'' && docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -10\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8089/health)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=60s)",
|
||||
"Bash(grep -rn \"clawbot\\\\|ClawBot\" /Users/ogt/awoooi/ --include=*.yaml --include=*.yml --include=*.json)",
|
||||
"Bash(grep -rn \"ClawBot\\\\|clawbot\" /Users/ogt/awoooi/apps/ --include=*.py --include=*.ts --include=*.tsx)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=100)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200)",
|
||||
"Bash(export KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml)",
|
||||
"Bash(ssh root@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|exception|execute|background|parse'' | tail -40\")",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/approvals)",
|
||||
"Bash(ssh k3s@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
|
||||
"Bash(ssh ubuntu@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse|skip'' | tail -50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=500 2>&1 | grep -iE ''background_execution|approve_action|reject|k8s_executor'' | tail -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy,sts -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>&1 | grep -iE ''background_execution|k8s_executor|parse'' | tail -10\")"
|
||||
],
|
||||
"additionalDirectories": [
|
||||
"/Users/ogt/awoooi/docs",
|
||||
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
|
||||
"/Users/ogt/awoooi/apps/web/src/app",
|
||||
"/Users/ogt/awoooi/apps/api",
|
||||
"/Users/ogt/awoooi/apps/api/http:/localhost:8000/api/v1",
|
||||
"/Users/ogt/awoooi/apps/web/public",
|
||||
"/Users/ogt/Downloads",
|
||||
"/Users/ogt/awoooi/apps/web/test-results",
|
||||
"/Users/ogt/awoooi",
|
||||
"/Users/ogt/awoooi/apps/web/src/app/[locale]",
|
||||
"/tmp"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -19,10 +19,18 @@
|
||||
|
||||
# 文件與腳本(不需要進 image)
|
||||
# 注意: docs/runbooks/, docs/adr/, .agents/skills/ 供 RAG 索引 (ADR-067 Phase 33)
|
||||
# scripts/ 大部分不需要進 image,但 CronJob 腳本需要
|
||||
# scripts/ 大部分不需要進 image,僅白名單 production runtime/ops 種子腳本
|
||||
# 2026-04-12 ogt (ADR-073 P2-1): 白名單允許 cron_km_vectorize.py
|
||||
scripts
|
||||
# 2026-05-13 codex: 白名單 T16 auto-repair canary PlayBook seed script
|
||||
# 2026-05-31 codex: MOMO backup Ansible playbook copies the backup script from
|
||||
# the controller image; keep only this backup script in the runtime context.
|
||||
scripts/**
|
||||
!scripts/
|
||||
!scripts/cron_km_vectorize.py
|
||||
!scripts/backup/
|
||||
!scripts/backup/backup-momo-188-pg.sh
|
||||
!scripts/ops/
|
||||
!scripts/ops/awooop-seed-auto-repair-canary-playbook.py
|
||||
|
||||
# Node 快取(monorepo 根目錄)
|
||||
node_modules
|
||||
|
||||
@@ -10,7 +10,7 @@ on:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: self-hosted
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
|
||||
@@ -43,10 +43,19 @@ jobs:
|
||||
├ 📝 ${{ steps.commit.outputs.message }}
|
||||
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
|
||||
└ 🌿 dev branch"
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
if AWOOI_CICD_STATUS=running \
|
||||
AWOOI_CICD_STAGE=dev-deploy \
|
||||
AWOOI_CICD_JOB_NAME="[DEV] 部署開始" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Dev deploy start notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
|
||||
# API 測試 (同 prod CI,確保 dev 也通過)
|
||||
- name: Run API Tests
|
||||
@@ -78,11 +87,18 @@ jobs:
|
||||
echo "✅ API 測試通過"
|
||||
|
||||
- name: Login to Harbor
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.HARBOR }}
|
||||
username: ${{ secrets.HARBOR_USERNAME }}
|
||||
password: ${{ secrets.HARBOR_PASSWORD }}
|
||||
run: |
|
||||
HARBOR_USERNAME="$(cat <<'AWOOOI_SECRET_HARBOR_USERNAME'
|
||||
${{ secrets.HARBOR_USERNAME }}
|
||||
AWOOOI_SECRET_HARBOR_USERNAME
|
||||
)"
|
||||
HARBOR_PASSWORD="$(cat <<'AWOOOI_SECRET_HARBOR_PASSWORD'
|
||||
${{ secrets.HARBOR_PASSWORD }}
|
||||
AWOOOI_SECRET_HARBOR_PASSWORD
|
||||
)"
|
||||
printf '%s' "$HARBOR_PASSWORD" | docker login "${{ env.HARBOR }}" \
|
||||
-u "$HARBOR_USERNAME" \
|
||||
--password-stdin
|
||||
|
||||
# Dev API 鏡像:強制重建,不用 cache(確保 models.json 等配置文件更新)
|
||||
- name: Build and Push API (Dev)
|
||||
@@ -98,34 +114,57 @@ jobs:
|
||||
|
||||
# 注入 Dev K8s Secrets
|
||||
- name: Inject Dev K8s Secrets
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
|
||||
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
run: |
|
||||
secret_b64() {
|
||||
python3 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
|
||||
}
|
||||
write_deploy_key() {
|
||||
mkdir -p ~/.ssh
|
||||
umask 077
|
||||
cat > ~/.ssh/deploy_key <<'AWOOOI_DEPLOY_KEY'
|
||||
${{ secrets.DEPLOY_SSH_KEY }}
|
||||
AWOOOI_DEPLOY_KEY
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
}
|
||||
TG_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_BOT_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_CHAT_ID'
|
||||
${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
AWOOOI_SECRET_TG_CHAT_ID
|
||||
)"
|
||||
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
|
||||
${{ secrets.NVIDIA_API_KEY }}
|
||||
AWOOOI_SECRET_NVIDIA_API_KEY
|
||||
)"
|
||||
GEMINI_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_GEMINI_API_KEY'
|
||||
${{ secrets.GEMINI_API_KEY }}
|
||||
AWOOOI_SECRET_GEMINI_API_KEY
|
||||
)"
|
||||
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 << SECRETS
|
||||
write_deploy_key
|
||||
# 2026-05-05 Codex: kubectl runs on 120 control-plane. 121 is a
|
||||
# worker and its local kubeconfig points at 127.0.0.1:6443.
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << SECRETS
|
||||
set -e
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"'"$(echo -n "${TG_BOT_TOKEN}" | base64 -w 0)"'"},
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"'"$(echo -n "${TG_CHAT_ID}" | base64 -w 0)"'"}
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
|
||||
]' || echo "⚠️ Telegram Secrets patch 跳過"
|
||||
|
||||
if [ -n "${NVIDIA_API_KEY}" ]; then
|
||||
if [ -n "${NVIDIA_API_KEY_B64}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
|
||||
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"'"$(echo -n "${NVIDIA_API_KEY}" | base64 -w 0)"'"}
|
||||
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"${NVIDIA_API_KEY_B64}"}
|
||||
]' && echo "✅ NVIDIA_API_KEY 已注入 dev"
|
||||
fi
|
||||
|
||||
if [ -n "${GEMINI_API_KEY}" ]; then
|
||||
if [ -n "${GEMINI_API_KEY_B64}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
|
||||
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"'"$(echo -n "${GEMINI_API_KEY}" | base64 -w 0)"'"}
|
||||
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"${GEMINI_API_KEY_B64}"}
|
||||
]' && echo "✅ GEMINI_API_KEY 已注入 dev"
|
||||
fi
|
||||
|
||||
@@ -134,14 +173,12 @@ jobs:
|
||||
|
||||
# 部署到 awoooi-dev
|
||||
- name: Deploy to Dev K8s
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
|
||||
run: |
|
||||
cat k8s/awoooi-dev/02-configmap.yaml | \
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 \
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 \
|
||||
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
|
||||
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 << 'DEPLOY'
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << 'DEPLOY'
|
||||
set -e
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
|
||||
@@ -182,10 +219,20 @@ jobs:
|
||||
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
|
||||
├ ⏱️ 耗時: ${MINUTES}m ${SECONDS}s
|
||||
└ 🩺 http://192.168.0.125:32344/api/v1/health"
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
if AWOOI_CICD_STATUS=success \
|
||||
AWOOI_CICD_STAGE=dev-deploy \
|
||||
AWOOI_CICD_JOB_NAME="[DEV] 部署完成" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
|
||||
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Dev deploy success notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
|
||||
- name: Notify Dev Deploy Failure
|
||||
if: failure()
|
||||
@@ -194,7 +241,16 @@ jobs:
|
||||
├ 📝 ${{ steps.commit.outputs.message }}
|
||||
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
|
||||
└ 🔗 <a href=\"http://192.168.0.110:3001/wooo/awoooi/actions\">查看日誌</a>"
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
if AWOOI_CICD_STATUS=failed \
|
||||
AWOOI_CICD_STAGE=dev-deploy \
|
||||
AWOOI_CICD_JOB_NAME="[DEV] 部署失敗" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Dev deploy failure notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,9 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 50
|
||||
|
||||
- name: Guard Workflow Secret Surfaces
|
||||
run: node scripts/ci/check-gitea-step-env-secrets.js
|
||||
|
||||
- name: Skip Stale Main Push
|
||||
id: stale
|
||||
run: |
|
||||
@@ -102,7 +105,6 @@ jobs:
|
||||
- name: Notify Code Review Start
|
||||
if: steps.stale.outputs.skip != 'true'
|
||||
env:
|
||||
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
|
||||
BRANCH: ${{ steps.ctx.outputs.branch }}
|
||||
@@ -110,18 +112,33 @@ jobs:
|
||||
FILES_DISPLAY: ${{ steps.ctx.outputs.files_display }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing; skip start notification"
|
||||
exit 0
|
||||
fi
|
||||
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
html_escape() { sed 's/&/\&/g; s/</\</g; s/>/\>/g'; }
|
||||
COMMIT_ESC="$(printf '%s' "$COMMIT_MSG" | html_escape)"
|
||||
FILES_ESC="$(printf '%s\n' "$FILES_DISPLAY" | html_escape)"
|
||||
MSG="$(printf '🔍 <b>Code Review 啟動</b>\n──────────────────────\n📦 Commit <code>%s</code> 🌿 <code>%s</code>\n📝 <code>%s</code>\n📁 <b>變更檔案:</b>\n%s\n──────────────────────\n🤖 <b>Hermes → OpenClaw → Elephant Alpha → NemoTron</b>\n📊 即時進度:<a href=\"%s\">%s</a>' "$SHORT_SHA" "$BRANCH" "$COMMIT_ESC" "$FILES_ESC" "$REPORT_URL" "$REPORT_URL")"
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
if AWOOI_CICD_STATUS=running \
|
||||
AWOOI_CICD_STAGE=code-review \
|
||||
AWOOI_CICD_JOB_NAME="Code Review 啟動" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
|
||||
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
|
||||
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Code review start notification mirrored through AWOOI API"
|
||||
else
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing and AWOOI API notify failed; skip start notification"
|
||||
exit 0
|
||||
fi
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
fi
|
||||
|
||||
- name: Run Deterministic Review
|
||||
if: steps.stale.outputs.skip != 'true'
|
||||
@@ -139,15 +156,14 @@ jobs:
|
||||
- name: Notify Code Review Completion
|
||||
if: always() && steps.stale.outputs.skip != 'true'
|
||||
env:
|
||||
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing; skip completion notification"
|
||||
exit 0
|
||||
fi
|
||||
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
REPORT=/tmp/code-review-report.json
|
||||
if [ ! -s "$REPORT" ]; then
|
||||
cat > "$REPORT" <<'JSON'
|
||||
@@ -180,7 +196,25 @@ jobs:
|
||||
TOP_ESC="$(printf '%s' "$TOP_ISSUE" | html_escape)"
|
||||
|
||||
MSG="$(printf '%s <b>Code Review 完成・%s</b>\n──────────────────────\n🔴 CRITICAL <code>%s</code> 🟠 HIGH <code>%s</code> 🟡 MEDIUM <code>%s</code> 🟢 LOW <code>%s</code>\n──────────────────────\n⚠️ <b>主要問題</b>\n%s\n\n🔍 <b>整體風險等級</b>\n%s:%s\n\n⚠️ <b>最高關注問題</b>\n1. %s\n──────────────────────\n🤖 Elephant Alpha:<b>%s</b> ✅ %s\n📊 完整報告:<a href=\"%s\">%s</a>' "$STATUS" "$SHORT_SHA" "$CRITICAL" "$HIGH" "$MEDIUM" "$LOW" "$ISSUE_LINE" "$RISK" "$SUMMARY_ESC" "$TOP_ESC" "$RISK" "$ACTION_ESC" "$REPORT_URL" "$REPORT_URL")"
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
CICD_STATUS=success
|
||||
if [ "$RISK" = "MEDIUM" ]; then CICD_STATUS=pending; fi
|
||||
if [ "$RISK" = "HIGH" ] || [ "$RISK" = "CRITICAL" ]; then CICD_STATUS=failed; fi
|
||||
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
|
||||
AWOOI_CICD_STAGE=code-review \
|
||||
AWOOI_CICD_JOB_NAME="Code Review 完成・${RISK}" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
|
||||
AWOOI_CICD_SUMMARY="CRITICAL=${CRITICAL}; HIGH=${HIGH}; MEDIUM=${MEDIUM}; LOW=${LOW}; ${SUMMARY}" \
|
||||
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Code review completion notification mirrored through AWOOI API"
|
||||
else
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing and AWOOI API notify failed; skip completion notification"
|
||||
exit 0
|
||||
fi
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
fi
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# =============================================================================
|
||||
# Deploy Prometheus Alert Rules (獨立 workflow)
|
||||
# 2026-04-05 Claude Code (ADR-039 I3): 從 cd.yaml 分離
|
||||
# 觸發條件: ops/monitoring/alerts-unified.yml 有變更 或 workflow_dispatch
|
||||
# 觸發條件: ops/monitoring/alerts-unified.yml / slo-rules.yml 有變更 或 workflow_dispatch
|
||||
# 說明: 告警規則部署不依賴應用構建,獨立觸發以加快響應速度
|
||||
# =============================================================================
|
||||
|
||||
@@ -12,6 +12,8 @@ on:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ops/monitoring/alerts-unified.yml'
|
||||
- 'ops/monitoring/slo-rules.yml'
|
||||
- 'scripts/ops/deploy-alerts.sh'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
@@ -30,11 +32,15 @@ jobs:
|
||||
run: |
|
||||
pip3 install -q pyyaml 2>/dev/null || pip install -q pyyaml
|
||||
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/alerts-unified.yml')); print('YAML OK')"
|
||||
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/slo-rules.yml')); print('SLO YAML OK')"
|
||||
|
||||
- name: Setup SSH key
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.DEPLOY_SSH_KEY }}" > ~/.ssh/id_ed25519
|
||||
umask 077
|
||||
cat > ~/.ssh/id_ed25519 <<'AWOOOI_DEPLOY_KEY'
|
||||
${{ secrets.DEPLOY_SSH_KEY }}
|
||||
AWOOOI_DEPLOY_KEY
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan 192.168.0.110 >> ~/.ssh/known_hosts
|
||||
|
||||
@@ -50,6 +56,17 @@ jobs:
|
||||
SHORT_SHA="${{ github.sha }}"
|
||||
SHORT_SHA="${SHORT_SHA:0:7}"
|
||||
MSG="${EMOJI} Prometheus 告警規則部署 ${STATUS} (${SHORT_SHA})"
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
--data-urlencode "text=${MSG}" || true
|
||||
CICD_STATUS="success"
|
||||
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
|
||||
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
|
||||
AWOOI_CICD_STAGE=deploy-alerts \
|
||||
AWOOI_CICD_JOB_NAME="Prometheus 告警規則部署" \
|
||||
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
|
||||
AWOOI_CICD_SUMMARY="${MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Alert rule deploy notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
--data-urlencode "text=${MSG}" || true
|
||||
fi
|
||||
|
||||
@@ -51,10 +51,52 @@ jobs:
|
||||
echo "status=failed" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
|
||||
- name: Source Provider Freshness Smoke
|
||||
run: |
|
||||
SOURCE_CANARY_RUN_REF="gitea-e2e-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}"
|
||||
echo "SOURCE_CANARY_RUN_REF=${SOURCE_CANARY_RUN_REF}" >> "$GITHUB_ENV"
|
||||
echo "SOURCE_LINK_CANARY_WORK_ITEM_ID=source-evidence:sentry:upstream_canary:awoooi-source-link-canary-${SOURCE_CANARY_RUN_REF}" >> "$GITHUB_ENV"
|
||||
OPERATOR_KEY="$(cat <<'AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY'
|
||||
${{ secrets.AWOOOP_OPERATOR_API_KEY }}
|
||||
AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY
|
||||
)"
|
||||
AWOOOP_OPERATOR_API_KEY="${OPERATOR_KEY}" \
|
||||
AWOOOP_OPERATOR_ID=gitea-e2e-health \
|
||||
python3 scripts/alert_chain_smoke_test.py \
|
||||
--api-url https://awoooi.wooo.work \
|
||||
--metrics-api-url http://192.168.0.125:32334 \
|
||||
--source-provider-heartbeat \
|
||||
--source-provider-upstream-canary \
|
||||
--run-ref "${SOURCE_CANARY_RUN_REF}" \
|
||||
--source-link-canary-target-incident-id INC-20260505-25E744 \
|
||||
--json
|
||||
|
||||
- name: Source Correlation Applied-Link Smoke
|
||||
run: |
|
||||
python3 scripts/awooop_source_correlation_apply_smoke.py \
|
||||
--api-url https://awoooi.wooo.work \
|
||||
--target-incident-id INC-20260505-25E744 \
|
||||
--allow-existing-apply \
|
||||
--refresh-if-stale-days 6 \
|
||||
--refresh-work-item-id "${SOURCE_LINK_CANARY_WORK_ITEM_ID}" \
|
||||
--verify-refresh-candidate \
|
||||
--reviewer-id gitea_e2e_source_link_canary \
|
||||
--operator-note "T124 dedicated source-link canary refresh; append-only status-chain proof"
|
||||
|
||||
- name: Notify Telegram on Failure
|
||||
if: failure()
|
||||
run: |
|
||||
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||||
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d parse_mode="HTML" \
|
||||
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
|
||||
MSG="E2E Health Check 失敗;API 健康檢查未通過"
|
||||
if AWOOI_CICD_STATUS=failed \
|
||||
AWOOI_CICD_STAGE=e2e-health \
|
||||
AWOOI_CICD_JOB_NAME="E2E Health Check" \
|
||||
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
|
||||
AWOOI_CICD_SUMMARY="${MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "E2E failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||||
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d parse_mode="HTML" \
|
||||
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
|
||||
fi
|
||||
|
||||
@@ -17,6 +17,7 @@ on:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'apps/api/migrations/*.sql'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
@@ -56,45 +57,101 @@ jobs:
|
||||
- name: Identify new migrations
|
||||
id: diff
|
||||
run: |
|
||||
NEW_FILES=$(git diff --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
|
||||
ALL_NEW_FILES=$(git diff --no-renames --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
|
||||
NEW_FILES=$(echo "$ALL_NEW_FILES" | grep -Ev '(_down|rollback)\.sql$' || true)
|
||||
SKIPPED_ROLLBACK_FILES=$(echo "$ALL_NEW_FILES" | grep -E '(_down|rollback)\.sql$' || true)
|
||||
echo "new_files<<EOF" >> $GITHUB_OUTPUT
|
||||
echo "$NEW_FILES" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
echo "=== New migration files ==="
|
||||
echo "$NEW_FILES"
|
||||
if [ -n "$SKIPPED_ROLLBACK_FILES" ]; then
|
||||
echo "=== Rollback/down migrations skipped by design ==="
|
||||
echo "$SKIPPED_ROLLBACK_FILES"
|
||||
fi
|
||||
|
||||
- name: Apply new migrations
|
||||
if: steps.diff.outputs.new_files != ''
|
||||
env:
|
||||
# 從 Gitea secrets 取,不直接明碼
|
||||
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# 從 Gitea secrets 取,不放 step-level env,避免 runner log 展開。
|
||||
# MIGRATION_DATABASE_URL 是限權帳號;DATABASE_URL 只在 PostgreSQL
|
||||
# 明確回報「必須是 table owner」時作為受控 fallback。
|
||||
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
|
||||
${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
AWOOOI_SECRET_MIGRATION_DATABASE_URL
|
||||
)"
|
||||
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
|
||||
${{ secrets.DATABASE_URL }}
|
||||
AWOOOI_SECRET_DATABASE_URL
|
||||
)"
|
||||
if [ -z "$PGURL" ]; then
|
||||
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
|
||||
exit 1
|
||||
fi
|
||||
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
|
||||
apply_migration() {
|
||||
local url="$1"
|
||||
local file="$2"
|
||||
psql "$url" \
|
||||
-v ON_ERROR_STOP=1 \
|
||||
--single-transaction \
|
||||
-f "$file"
|
||||
}
|
||||
|
||||
# 套用每個新檔 (single transaction per file)
|
||||
echo "${{ steps.diff.outputs.new_files }}" | while IFS= read -r file; do
|
||||
[ -z "$file" ] && continue
|
||||
echo "=== Applying: $file ==="
|
||||
psql "$PGURL_PSQL" \
|
||||
-v ON_ERROR_STOP=1 \
|
||||
--single-transaction \
|
||||
-f "$file"
|
||||
migration_err="$(mktemp)"
|
||||
if ! apply_migration "$PGURL_PSQL" "$file" 2>"$migration_err"; then
|
||||
if grep -Eq "(must be owner of table|permission denied for table)" "$migration_err"; then
|
||||
if [ -z "$OWNER_PGURL_PSQL" ]; then
|
||||
cat "$migration_err" >&2
|
||||
echo "::error::migration requires table owner but DATABASE_URL secret is not set"
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::migration requires table owner; retrying with owner connection"
|
||||
apply_migration "$OWNER_PGURL_PSQL" "$file"
|
||||
else
|
||||
cat "$migration_err" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
rm -f "$migration_err"
|
||||
echo "=== OK: $file ==="
|
||||
done
|
||||
|
||||
- name: Seed asset_discovery_run (audit)
|
||||
if: steps.diff.outputs.new_files != ''
|
||||
env:
|
||||
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
|
||||
${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
AWOOOI_SECRET_MIGRATION_DATABASE_URL
|
||||
)"
|
||||
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
|
||||
${{ secrets.DATABASE_URL }}
|
||||
AWOOOI_SECRET_DATABASE_URL
|
||||
)"
|
||||
if [ -z "$PGURL" ]; then
|
||||
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
|
||||
exit 1
|
||||
fi
|
||||
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
FILES_JSON=$(echo "${{ steps.diff.outputs.new_files }}" | jq -Rn '[inputs | select(length > 0)]')
|
||||
psql "$PGURL_PSQL" -c "
|
||||
SUMMARY_JSON=$(jq -cn \
|
||||
--arg commit_sha "${{ github.sha }}" \
|
||||
--argjson files "$FILES_JSON" \
|
||||
'{type: "ci_migration", commit_sha: $commit_sha, files: $files}')
|
||||
SUMMARY_JSON_SQL=${SUMMARY_JSON//\'/\'\'}
|
||||
|
||||
seed_audit() {
|
||||
local url="$1"
|
||||
psql "$url" -v ON_ERROR_STOP=1 <<SQL
|
||||
INSERT INTO asset_discovery_run (
|
||||
run_id, triggered_by, scope, scan_depth, status,
|
||||
started_at, ended_at, tools_used, summary
|
||||
@@ -106,23 +163,51 @@ jobs:
|
||||
'success',
|
||||
NOW(),
|
||||
NOW(),
|
||||
'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb,
|
||||
jsonb_build_object(
|
||||
'type', 'ci_migration',
|
||||
'commit_sha', '${{ github.sha }}',
|
||||
'files', $FILES_JSON
|
||||
)
|
||||
'{"psql": 1, "gitea_ci": 1}'::jsonb,
|
||||
'${SUMMARY_JSON_SQL}'::jsonb
|
||||
);
|
||||
"
|
||||
SQL
|
||||
}
|
||||
|
||||
audit_err="$(mktemp)"
|
||||
if ! seed_audit "$PGURL_PSQL" 2>"$audit_err"; then
|
||||
if grep -q "permission denied for table asset_discovery_run" "$audit_err"; then
|
||||
if [ -z "$OWNER_PGURL_PSQL" ]; then
|
||||
cat "$audit_err" >&2
|
||||
echo "::error::audit requires table insert privilege but DATABASE_URL secret is not set"
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::audit requires owner connection; retrying with owner connection"
|
||||
seed_audit "$OWNER_PGURL_PSQL"
|
||||
else
|
||||
cat "$audit_err" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
rm -f "$audit_err"
|
||||
|
||||
- name: Notify Telegram (if configured)
|
||||
if: always()
|
||||
env:
|
||||
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
run: |
|
||||
TG_TOKEN="$(cat <<'AWOOOI_SECRET_TG_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_TOKEN
|
||||
)"
|
||||
STATUS="${{ job.status }}"
|
||||
CICD_STATUS="success"
|
||||
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
|
||||
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
|
||||
AWOOI_CICD_STAGE=run-migration \
|
||||
AWOOI_CICD_JOB_NAME="Migration CI" \
|
||||
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
|
||||
AWOOI_CICD_SUMMARY="Migration CI: ${STATUS}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Migration notification mirrored through AWOOI API"
|
||||
exit 0
|
||||
fi
|
||||
if [ -n "$TG_TOKEN" ] && [ -n "$TG_CHAT" ]; then
|
||||
STATUS="${{ job.status }}"
|
||||
MSG="🗄️ Migration CI: \`${STATUS}\` — commit ${{ github.sha }}"
|
||||
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
|
||||
-d chat_id="${TG_CHAT}" \
|
||||
|
||||
25
.github/workflows/cd.yaml
vendored
25
.github/workflows/cd.yaml
vendored
@@ -13,12 +13,10 @@
|
||||
|
||||
name: CD
|
||||
|
||||
# 2026-05-12 Codex: GitHub 僅保留唯讀備份;生產 CI/CD 只能從 Gitea 執行。
|
||||
# 本 workflow 曾可 push / workflow_dispatch 後 build、patch secret、kubectl apply,
|
||||
# 會和 `.gitea/workflows/cd.yaml` 競爭 K3s production 狀態,因此硬停用。
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '*.md'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
force_deploy:
|
||||
@@ -60,6 +58,7 @@ jobs:
|
||||
# ==================== Pre-flight Check (10s Fail-Fast) ====================
|
||||
pre-flight-check:
|
||||
name: "Pre-flight Check"
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
timeout-minutes: 1
|
||||
steps:
|
||||
@@ -133,6 +132,7 @@ jobs:
|
||||
# 2026-03-29 Claude Code: 確保監控覆蓋率 >= 90%
|
||||
monitoring-coverage:
|
||||
name: "Monitoring Coverage"
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: pre-flight-check
|
||||
timeout-minutes: 2
|
||||
@@ -152,6 +152,7 @@ jobs:
|
||||
# ==================== 路徑偵測 (使用 dorny/paths-filter) ====================
|
||||
detect-changes:
|
||||
name: Detect Changes
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: [pre-flight-check, monitoring-coverage]
|
||||
timeout-minutes: 1
|
||||
@@ -197,11 +198,7 @@ jobs:
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: [detect-changes, build-web]
|
||||
timeout-minutes: 20
|
||||
if: |
|
||||
!inputs.skip_api && (
|
||||
needs.detect-changes.outputs.api == 'true' ||
|
||||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||||
)
|
||||
if: ${{ false }}
|
||||
outputs:
|
||||
image_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
@@ -238,11 +235,7 @@ jobs:
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: detect-changes
|
||||
timeout-minutes: 20
|
||||
if: |
|
||||
!inputs.skip_web && (
|
||||
needs.detect-changes.outputs.web == 'true' ||
|
||||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||||
)
|
||||
if: ${{ false }}
|
||||
outputs:
|
||||
image_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
@@ -293,7 +286,7 @@ jobs:
|
||||
concurrency:
|
||||
group: runner-awoooi-cd-mutex
|
||||
cancel-in-progress: false
|
||||
if: always() && (needs.build-api.result == 'success' || needs.build-api.result == 'skipped') && (needs.build-web.result == 'success' || needs.build-web.result == 'skipped')
|
||||
if: ${{ false }}
|
||||
environment: production
|
||||
steps:
|
||||
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
|
||||
|
||||
17
.github/workflows/deploy-prod.yml
vendored
17
.github/workflows/deploy-prod.yml
vendored
@@ -14,15 +14,10 @@
|
||||
|
||||
name: Deploy to Production
|
||||
|
||||
# 2026-05-12 Codex: GitHub 是唯讀備份,production deploy 只能從 Gitea 進入。
|
||||
# 這份歷史 workflow 仍含 Harbor build/push 與 kubectl apply/rollout,會和 Gitea CD 競爭。
|
||||
# 保留檔案供稽核,但停用所有 job。
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'apps/api/**'
|
||||
- 'apps/web/**'
|
||||
- 'k8s/awoooi-prod/**'
|
||||
- '.github/workflows/deploy-prod.yml'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
deploy_api:
|
||||
@@ -70,6 +65,7 @@ jobs:
|
||||
# ===========================================================================
|
||||
build:
|
||||
name: "Build Images"
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
outputs:
|
||||
image_tag: ${{ steps.meta.outputs.tag }}
|
||||
@@ -138,6 +134,7 @@ jobs:
|
||||
deploy:
|
||||
name: "Deploy to K3s"
|
||||
needs: build
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
|
||||
steps:
|
||||
@@ -210,7 +207,7 @@ jobs:
|
||||
smoke-test:
|
||||
name: "Smoke Tests"
|
||||
needs: deploy
|
||||
if: ${{ !inputs.skip_tests }}
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
|
||||
steps:
|
||||
@@ -248,7 +245,7 @@ jobs:
|
||||
notify:
|
||||
name: "Send Notification"
|
||||
needs: [build, deploy, smoke-test]
|
||||
if: always()
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
|
||||
steps:
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -93,3 +93,4 @@ tsconfig.tsbuildinfo
|
||||
!.aiderignore
|
||||
.claude/settings.local.json
|
||||
.claude/settings.json
|
||||
.claude/settings.json.bak*
|
||||
|
||||
@@ -31,6 +31,9 @@
|
||||
|
||||
## 🔴 絕對禁止 → [HARD_RULES.md](docs/HARD_RULES.md)
|
||||
|
||||
## 🔴 文件語言鐵律 → [文件語言規範](docs/HARD_RULES.md#文件語言規範)
|
||||
Markdown、ADR、LOGBOOK、Runbook、交接文件與計畫文件一律使用繁體中文;程式符號、API、指令、錯誤碼、服務名稱與原始 log 可保留英文。
|
||||
|
||||
## 🔴 紅區治理 → [RED_ZONES.md](docs/RED_ZONES.md)
|
||||
Tier 3 核心檔案 (decision_manager, trust_engine, config 等) 修改需首席架構師授權
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
# 2026-04-05 warm-up deploy triggered
|
||||
# 2026-05-20 source-provider-heartbeat deploy trigger
|
||||
|
||||
@@ -44,28 +44,6 @@ FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy installed packages from builder
|
||||
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
||||
COPY --from=builder /usr/local/bin /usr/local/bin
|
||||
|
||||
# 2026-04-01 ogt: CACHE_BUST 強制失效 src/ 和 models.json 層
|
||||
# deps 層 (pip install) 仍可 cache;代碼/配置變更必須重建
|
||||
ARG CACHE_BUST=none
|
||||
COPY apps/api/src/ ./src/
|
||||
COPY apps/api/models.json ./models.json
|
||||
# 2026-04-09 ogt: 規則引擎配置 — alert_rule_engine.py 從此檔載入規則
|
||||
COPY apps/api/alert_rules.yaml ./alert_rules.yaml
|
||||
# 2026-04-10 Claude Sonnet 4.6: drift_detector 需要 k8s/ YAML 做 Git state 比對
|
||||
COPY k8s/ ./k8s/
|
||||
# 2026-04-10 Claude Sonnet 4.6: RAG 知識庫索引來源 (ADR-067 Phase 33)
|
||||
COPY docs/ ./docs/
|
||||
COPY .agents/skills/ ./.agents/skills/
|
||||
# 2026-05-04 Claude Sonnet 4.6 (Task 1.2): hermes agent_loader 的 system prompt 來源
|
||||
# agent_loader.py 預設讀 /app/.claude/agents/,對應 K8s AGENTS_DIR 環境變數
|
||||
COPY .claude/agents/ ./.claude/agents/
|
||||
# 2026-04-12 ogt (ADR-073 P2-1): CronJob 腳本 — 獨立腳本取代 inline Python
|
||||
COPY scripts/ ./scripts/
|
||||
|
||||
# Install openssh-client + curl — SSH_COMMAND Playbook + healthcheck
|
||||
# Install kubectl — drift_detector 需要 kubectl 讀取 K8s 實際狀態
|
||||
# (2026-04-09 Claude Sonnet 4.6 Asia/Taipei, Bug #6 修正 — python:3.11-slim 無 openssh-client)
|
||||
@@ -75,8 +53,38 @@ RUN apt-get update && apt-get install -y --no-install-recommends openssh-client
|
||||
chmod +x kubectl && mv kubectl /usr/local/bin/kubectl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
||||
# Create non-root user before copying app artifacts so COPY --chown can avoid
|
||||
# an expensive full-tree chown layer on every source-only rebuild.
|
||||
RUN useradd -m -u 1000 appuser
|
||||
|
||||
# Copy installed packages from builder
|
||||
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
||||
COPY --from=builder /usr/local/bin /usr/local/bin
|
||||
|
||||
# 2026-04-01 ogt: CACHE_BUST 強制失效 src/ 和 models.json 層
|
||||
# deps 層 (pip install) 仍可 cache;代碼/配置變更必須重建
|
||||
ARG CACHE_BUST=none
|
||||
COPY --chown=appuser:appuser apps/api/src/ ./src/
|
||||
# 2026-04-09 ogt: 規則引擎配置 — alert_rule_engine.py 從此檔載入規則
|
||||
COPY --chown=appuser:appuser apps/api/models.json ./models.json
|
||||
COPY --chown=appuser:appuser apps/api/alert_rules.yaml ./alert_rules.yaml
|
||||
# 2026-04-10 Claude Sonnet 4.6: drift_detector 需要 k8s/ YAML 做 Git state 比對
|
||||
COPY --chown=appuser:appuser k8s/ ./k8s/
|
||||
# 2026-05-24 Codex: truth-chain / Ansible readiness needs the repo-known
|
||||
# playbook catalog in the API image.
|
||||
# 2026-05-31 Codex: ansible-core is now installed through pyproject.toml so
|
||||
# this catalog can graduate from visibility-only to check-mode runtime-ready
|
||||
# once repair SSH material is mounted and readable. This still does not enable
|
||||
# automatic apply; approval/execution code remains the gate.
|
||||
COPY --chown=appuser:appuser infra/ansible/ ./infra/ansible/
|
||||
# 2026-04-10 Claude Sonnet 4.6: RAG 知識庫索引來源 (ADR-067 Phase 33)
|
||||
COPY --chown=appuser:appuser docs/ ./docs/
|
||||
COPY --chown=appuser:appuser .agents/skills/ ./.agents/skills/
|
||||
# 2026-05-04 Claude Sonnet 4.6 (Task 1.2): hermes agent_loader 的 system prompt 來源
|
||||
# agent_loader.py 預設讀 /app/.claude/agents/,對應 K8s AGENTS_DIR 環境變數
|
||||
COPY --chown=appuser:appuser .claude/agents/ ./.claude/agents/
|
||||
# 2026-04-12 ogt (ADR-073 P2-1): CronJob 腳本 — 獨立腳本取代 inline Python
|
||||
COPY --chown=appuser:appuser scripts/ ./scripts/
|
||||
USER appuser
|
||||
|
||||
# Expose port
|
||||
|
||||
@@ -219,7 +219,7 @@ rules:
|
||||
optimization:
|
||||
- type: SYSTEMD_GUARDRAIL
|
||||
description: "人工批准後停用錯誤 watchdog drop-in,並為 runner 加 CPUQuota=200%、MemoryMax=2G"
|
||||
command: "bash scripts/ops/apply-runner-systemd-guardrails.sh --apply"
|
||||
command: "sudo /home/wooo/scripts/apply-runner-systemd-guardrails.sh --apply"
|
||||
- type: CI_CAPACITY
|
||||
description: "若 110 同時承載 Sentry/ClickHouse/Gitea,不應讓多個 runner 無限制並行"
|
||||
command: "檢查 active jobs、runner 數量與 Gitea Actions concurrency,必要時分流 runner"
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
-- ADR-090 capacity_violation_event metric violation types
|
||||
-- 日期:2026-05-07(台北)
|
||||
-- 目的:讓 capacity_scanner_job.py 寫入的 cpu/mem/swap 細項違規符合 DB constraint。
|
||||
--
|
||||
-- 背景:
|
||||
-- capacity_scanner_job.py 會寫入:
|
||||
-- - cpu_over_threshold
|
||||
-- - mem_over_threshold
|
||||
-- - swap_over_threshold
|
||||
-- 但原始 ADR-090 DDL 只允許較粗的 host_saturation,導致 production 出現
|
||||
-- capacity_violation_event_type_valid check violation,容量治理事件漏記。
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE capacity_violation_event
|
||||
DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
|
||||
|
||||
ALTER TABLE capacity_violation_event
|
||||
ADD CONSTRAINT capacity_violation_event_type_valid
|
||||
CHECK (violation_type IN (
|
||||
'no_limit_set',
|
||||
'over_request',
|
||||
'over_limit',
|
||||
'host_saturation',
|
||||
'over_sla_budget',
|
||||
'unauthorized_new_deploy',
|
||||
'cpu_over_threshold',
|
||||
'mem_over_threshold',
|
||||
'swap_over_threshold',
|
||||
'load_over_threshold'
|
||||
));
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Rollback(需人工確認後執行):
|
||||
-- BEGIN;
|
||||
-- ALTER TABLE capacity_violation_event
|
||||
-- DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
|
||||
-- ALTER TABLE capacity_violation_event
|
||||
-- ADD CONSTRAINT capacity_violation_event_type_valid
|
||||
-- CHECK (violation_type IN (
|
||||
-- 'no_limit_set',
|
||||
-- 'over_request',
|
||||
-- 'over_limit',
|
||||
-- 'host_saturation',
|
||||
-- 'over_sla_budget',
|
||||
-- 'unauthorized_new_deploy'
|
||||
-- ));
|
||||
-- COMMIT;
|
||||
36
apps/api/migrations/adr090d_ansible_operation_types.sql
Normal file
36
apps/api/migrations/adr090d_ansible_operation_types.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
-- ADR-090-D: automation_operation_log.operation_type adds Ansible executor audit states
|
||||
-- Created: 2026-05-12 Taipei
|
||||
--
|
||||
-- Purpose:
|
||||
-- T3 Ansible declarative executor visibility. These operation types allow
|
||||
-- the AI automation truth chain to record that Ansible was matched,
|
||||
-- check-mode executed, applied, rolled back, or explicitly skipped.
|
||||
--
|
||||
-- Safety:
|
||||
-- This migration only expands the CHECK allowlist. It does not execute
|
||||
-- Ansible, change approval behavior, or create auto-remediation rows.
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
|
||||
'monitor_configured','monitor_removed',
|
||||
'alert_fired','alert_suppressed','alert_routed',
|
||||
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
|
||||
'playbook_generated','playbook_updated','playbook_executed',
|
||||
'remediation_executed','remediation_verified','remediation_rolled_back',
|
||||
'self_correction_attempted',
|
||||
'km_created','km_updated','km_linked',
|
||||
'asset_discovered','coverage_recalculated',
|
||||
'capacity_recommendation','quota_enforced',
|
||||
'notification_formatted',
|
||||
'ansible_candidate_matched',
|
||||
'ansible_check_mode_executed',
|
||||
'ansible_apply_executed',
|
||||
'ansible_rollback_executed',
|
||||
'ansible_execution_skipped'
|
||||
));
|
||||
|
||||
COMMENT ON CONSTRAINT automation_operation_log_type_valid ON automation_operation_log IS
|
||||
'ADR-090-D: allow first-class Ansible executor audit states for AwoooP truth-chain visibility.';
|
||||
19
apps/api/migrations/adr090d_ansible_operation_types_down.sql
Normal file
19
apps/api/migrations/adr090d_ansible_operation_types_down.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- ADR-090-D rollback: remove Ansible executor audit states from operation_type allowlist.
|
||||
-- Only apply after confirming no automation_operation_log rows use ansible_* operation types.
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
|
||||
'monitor_configured','monitor_removed',
|
||||
'alert_fired','alert_suppressed','alert_routed',
|
||||
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
|
||||
'playbook_generated','playbook_updated','playbook_executed',
|
||||
'remediation_executed','remediation_verified','remediation_rolled_back',
|
||||
'self_correction_attempted',
|
||||
'km_created','km_updated','km_linked',
|
||||
'asset_discovered','coverage_recalculated',
|
||||
'capacity_recommendation','quota_enforced',
|
||||
'notification_formatted'
|
||||
));
|
||||
@@ -0,0 +1,164 @@
|
||||
-- T9: approved SSH execution MCP Gateway seed
|
||||
-- 目的:讓 Telegram/Approval 已批准的 SSH 修復動作通過 AwoooP Gateway 五閘門。
|
||||
-- 邊界:只授權 approval_executor;write/admin 仍需 Gate 5 短效 approval key。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_body AS (
|
||||
SELECT jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', 'approval_executor',
|
||||
'display_name', 'Approval Executor',
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Approved SSH execution through AwoooP MCP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read', 'write', 'admin'),
|
||||
'requires_gate5_for_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't9_ssh_approval_gateway'
|
||||
) AS body_json
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
'approval_executor',
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t9_ssh_approval_gateway',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'approval_executor'
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'approval_executor_active_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH gateway_tools(tool_name, description, required_scope) AS (
|
||||
VALUES
|
||||
('ssh_diagnose', 'SSH host diagnosis read', 'read'),
|
||||
('ssh_docker_restart', 'Approved Docker container restart over SSH', 'write'),
|
||||
('ssh_docker_compose_restart', 'Approved Docker Compose service restart over SSH', 'write'),
|
||||
('ssh_systemctl_restart', 'Approved systemd service restart over SSH', 'write'),
|
||||
('ssh_clear_docker_logs', 'Approved Docker log truncation over SSH', 'write'),
|
||||
('ssh_renew_ssl', 'Approved certbot renewal over SSH', 'write'),
|
||||
('ssh_reload_nginx', 'Approved nginx config test and reload over SSH', 'write'),
|
||||
('ssh_docker_prune', 'Approved Docker prune over SSH with provider disk guard', 'admin')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
jsonb_build_array(required_scope),
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM gateway_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id, tool_name, allowed_scopes
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'approval_executor',
|
||||
tool_id,
|
||||
'migration:t9_ssh_approval_gateway',
|
||||
allowed_scopes,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_by = EXCLUDED.granted_by,
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'approval_executor_ssh_gateway',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,43 @@
|
||||
-- Rollback for T9 approved SSH execution MCP Gateway seed.
|
||||
-- Contract revisions are append-only; rollback revokes approval_executor grants
|
||||
-- and deactivates only the write/admin tools introduced here.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t9_ssh_approval_gateway'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id = 'approval_executor'
|
||||
AND granted_by = 'migration:t9_ssh_approval_gateway'
|
||||
AND is_revoked = FALSE;
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name IN (
|
||||
'ssh_docker_restart',
|
||||
'ssh_docker_compose_restart',
|
||||
'ssh_systemctl_restart',
|
||||
'ssh_clear_docker_logs',
|
||||
'ssh_renew_ssl',
|
||||
'ssh_reload_nginx',
|
||||
'ssh_docker_prune'
|
||||
);
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'approval_executor';
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'revoked'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'approval_executor'
|
||||
AND publisher_id = 'migration:t9_ssh_approval_gateway'
|
||||
AND lifecycle_status = 'active';
|
||||
@@ -0,0 +1,166 @@
|
||||
-- T23: auto-repair executor read-only MCP Gateway seed
|
||||
-- 目的:讓 YAML_RULE/PlayBook 的只讀 SSH 診斷步驟經過 AwoooP MCP Gateway。
|
||||
-- 邊界:只授權 read scope;write/admin SSH 工具仍必須走 approval_executor + Gate 5。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_body AS (
|
||||
SELECT jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', 'auto_repair_executor',
|
||||
'display_name', 'Auto Repair Executor',
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Read-only auto-repair diagnostics through AwoooP MCP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read'),
|
||||
'forbidden_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't23_auto_repair_diagnostic_gateway'
|
||||
) AS body_json
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
'auto_repair_executor',
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t23_auto_repair_executor_read_gateway',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'auto_repair_executor'
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'auto_repair_executor_active_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH read_tools(tool_name, description) AS (
|
||||
VALUES
|
||||
('ssh_diagnose', 'SSH host/container diagnosis read'),
|
||||
('ssh_get_top_processes', 'SSH top processes read'),
|
||||
('ssh_get_disk_usage', 'SSH disk usage read'),
|
||||
('ssh_get_memory_info', 'SSH memory info read'),
|
||||
('ssh_get_container_logs', 'SSH container logs read'),
|
||||
('ssh_get_container_status', 'SSH container status read'),
|
||||
('ssh_get_service_status', 'SSH service status read'),
|
||||
('ssh_check_port', 'SSH port check read'),
|
||||
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
|
||||
('ssh_get_swap_info', 'SSH swap info read')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM read_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id, tool_name, allowed_scopes
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'auto_repair_executor',
|
||||
tool_id,
|
||||
'migration:t23_auto_repair_executor_read_gateway',
|
||||
allowed_scopes,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_by = EXCLUDED.granted_by,
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'auto_repair_executor_read_gateway',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Rollback T23 auto-repair executor read-only MCP Gateway grant.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t23_auto_repair_executor_read_gateway'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id = 'auto_repair_executor'
|
||||
AND granted_by = 'migration:t23_auto_repair_executor_read_gateway';
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'auto_repair_executor';
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'retired'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'auto_repair_executor'
|
||||
AND publisher_id = 'migration:t23_auto_repair_executor_read_gateway'
|
||||
AND lifecycle_status = 'active';
|
||||
@@ -0,0 +1,25 @@
|
||||
-- =============================================================================
|
||||
-- AwoooP / AWOOOI MCP Gateway Shadow Onboarding
|
||||
-- 2026-05-13 Codex + ogt
|
||||
--
|
||||
-- 背景:
|
||||
-- AWOOOI 已完成 read-only MCP tool registry / grants seed,但 project 本身仍停在
|
||||
-- legacy_awoooi_default,會被 MCP Gateway Gate 1 正確攔截。
|
||||
--
|
||||
-- 邊界:
|
||||
-- 只把 AWOOOI 租戶升到 shadow,讓既有 Gate 1 生效。
|
||||
-- write/admin tool 仍未授權;自動修復/破壞性動作不因本 migration 開放。
|
||||
-- =============================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_projects
|
||||
SET
|
||||
migration_mode = 'shadow',
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND migration_mode = 'legacy_awoooi_default';
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,20 @@
|
||||
-- =============================================================================
|
||||
-- Rollback: AwoooP / AWOOOI MCP Gateway Shadow Onboarding
|
||||
-- 2026-05-13 Codex + ogt
|
||||
--
|
||||
-- 只回退仍停在 shadow 的 AWOOOI;若已由人工/後續 migration 推進到 canary/active,
|
||||
-- 不自動降級。
|
||||
-- =============================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_projects
|
||||
SET
|
||||
migration_mode = 'legacy_awoooi_default',
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND migration_mode = 'shadow';
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,211 @@
|
||||
-- T7: awoooi read-only MCP Gateway seed
|
||||
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3,產生 first-class audit。
|
||||
-- 邊界:只授權 read scope;不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_seed(agent_id, display_name) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator', 'Pre-decision Investigator'),
|
||||
('post_execution_verifier', 'Post-execution Verifier')
|
||||
),
|
||||
agent_body AS (
|
||||
SELECT
|
||||
agent_id,
|
||||
jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', agent_id,
|
||||
'display_name', display_name,
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read'),
|
||||
'forbidden_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't7_mcp_gateway_read_sense'
|
||||
) AS body_json
|
||||
FROM agent_seed
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
agent_id,
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN (SELECT agent_id FROM agent_seed)
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH read_tools(tool_name, description) AS (
|
||||
VALUES
|
||||
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
|
||||
('k8s_get_events', 'Kubernetes events read'),
|
||||
('k8s_describe_pod', 'Kubernetes pod describe read'),
|
||||
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
|
||||
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
|
||||
('ssh_diagnose', 'SSH host diagnosis read'),
|
||||
('ssh_get_top_processes', 'SSH top processes read'),
|
||||
('ssh_get_disk_usage', 'SSH disk usage read'),
|
||||
('ssh_get_memory_info', 'SSH memory info read'),
|
||||
('ssh_get_container_logs', 'SSH container logs read'),
|
||||
('ssh_get_container_status', 'SSH container status read'),
|
||||
('ssh_get_service_status', 'SSH service status read'),
|
||||
('ssh_check_port', 'SSH port check read'),
|
||||
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
|
||||
('ssh_get_swap_info', 'SSH swap info read'),
|
||||
('prometheus_query', 'Prometheus instant query read'),
|
||||
('prometheus_query_range', 'Prometheus range query read'),
|
||||
('prometheus_get_alert_history', 'Prometheus alert history read'),
|
||||
('gold_metrics', 'SigNoz gold metrics read'),
|
||||
('trace_url', 'SigNoz trace URL read'),
|
||||
('system_metrics', 'SigNoz system metrics read'),
|
||||
('query_logs', 'SigNoz logs read'),
|
||||
('error_logs_summary', 'SigNoz error logs summary read'),
|
||||
('list_approvals', 'Approval records read'),
|
||||
('get_approval', 'Approval detail read'),
|
||||
('list_incidents', 'Incident records read'),
|
||||
('list_timeline', 'Timeline records read'),
|
||||
('read_file', 'Filesystem allowlisted file read'),
|
||||
('list_directory', 'Filesystem allowlisted directory read'),
|
||||
('search_in_file', 'Filesystem allowlisted file search'),
|
||||
('list_dashboards', 'Grafana dashboards read'),
|
||||
('get_dashboard', 'Grafana dashboard read'),
|
||||
('get_panel_data', 'Grafana panel data read'),
|
||||
('generate_dashboard_url', 'Grafana dashboard URL read'),
|
||||
('search_runbook', 'Runbook semantic search read'),
|
||||
('get_index_stats', 'Runbook index stats read'),
|
||||
('argocd_list_apps', 'ArgoCD apps read'),
|
||||
('argocd_get_app_status', 'ArgoCD app status read'),
|
||||
('argocd_get_sync_history', 'ArgoCD sync history read'),
|
||||
('sentry_list_issues', 'Sentry issues read'),
|
||||
('sentry_get_issue', 'Sentry issue detail read'),
|
||||
('sentry_search_issues', 'Sentry issue search read')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM read_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id
|
||||
),
|
||||
grant_agents(agent_id) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator'),
|
||||
('post_execution_verifier')
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
grant_agents.agent_id,
|
||||
upsert_tools.tool_id,
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
'["read"]'::jsonb,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
CROSS JOIN grant_agents
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'awoooi_read_tools',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,77 @@
|
||||
-- Rollback for T7 awoooi read-only MCP Gateway seed.
|
||||
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND is_revoked = FALSE;
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name IN (
|
||||
'k8s_get_pod_logs',
|
||||
'k8s_get_events',
|
||||
'k8s_describe_pod',
|
||||
'k8s_get_hpa_status',
|
||||
'k8s_get_node_conditions',
|
||||
'ssh_diagnose',
|
||||
'ssh_get_top_processes',
|
||||
'ssh_get_disk_usage',
|
||||
'ssh_get_memory_info',
|
||||
'ssh_get_container_logs',
|
||||
'ssh_get_container_status',
|
||||
'ssh_get_service_status',
|
||||
'ssh_check_port',
|
||||
'ssh_get_nginx_error_log',
|
||||
'ssh_get_swap_info',
|
||||
'prometheus_query',
|
||||
'prometheus_query_range',
|
||||
'prometheus_get_alert_history',
|
||||
'gold_metrics',
|
||||
'trace_url',
|
||||
'system_metrics',
|
||||
'query_logs',
|
||||
'error_logs_summary',
|
||||
'list_approvals',
|
||||
'get_approval',
|
||||
'list_incidents',
|
||||
'list_timeline',
|
||||
'read_file',
|
||||
'list_directory',
|
||||
'search_in_file',
|
||||
'list_dashboards',
|
||||
'get_dashboard',
|
||||
'get_panel_data',
|
||||
'generate_dashboard_url',
|
||||
'search_runbook',
|
||||
'get_index_stats',
|
||||
'argocd_list_apps',
|
||||
'argocd_get_app_status',
|
||||
'argocd_get_sync_history',
|
||||
'sentry_list_issues',
|
||||
'sentry_get_issue',
|
||||
'sentry_search_issues'
|
||||
);
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'revoked'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND lifecycle_status = 'active';
|
||||
@@ -0,0 +1,213 @@
|
||||
-- T7: awoooi read-only MCP Gateway seed
|
||||
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3,產生 first-class audit。
|
||||
-- 邊界:只授權 read scope;不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_seed(agent_id, display_name) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator', 'Pre-decision Investigator'),
|
||||
('post_execution_verifier', 'Post-execution Verifier')
|
||||
),
|
||||
agent_body AS (
|
||||
SELECT
|
||||
agent_id,
|
||||
jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', agent_id,
|
||||
'display_name', display_name,
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read'),
|
||||
'forbidden_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't7_mcp_gateway_read_sense'
|
||||
) AS body_json
|
||||
FROM agent_seed
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
agent_id,
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN (SELECT agent_id FROM agent_seed)
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH read_tools(tool_name, description) AS (
|
||||
VALUES
|
||||
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
|
||||
('k8s_get_events', 'Kubernetes events read'),
|
||||
('k8s_describe_pod', 'Kubernetes pod describe read'),
|
||||
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
|
||||
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
|
||||
('ssh_diagnose', 'SSH host diagnosis read'),
|
||||
('ssh_get_top_processes', 'SSH top processes read'),
|
||||
('ssh_get_disk_usage', 'SSH disk usage read'),
|
||||
('ssh_get_memory_info', 'SSH memory info read'),
|
||||
('ssh_get_container_logs', 'SSH container logs read'),
|
||||
('ssh_get_container_status', 'SSH container status read'),
|
||||
('ssh_get_service_status', 'SSH service status read'),
|
||||
('ssh_check_port', 'SSH port check read'),
|
||||
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
|
||||
('ssh_get_swap_info', 'SSH swap info read'),
|
||||
('prometheus_query', 'Prometheus instant query read'),
|
||||
('prometheus_query_range', 'Prometheus range query read'),
|
||||
('prometheus_get_alert_history', 'Prometheus alert history read'),
|
||||
('gold_metrics', 'SigNoz gold metrics read'),
|
||||
('trace_url', 'SigNoz trace URL read'),
|
||||
('system_metrics', 'SigNoz system metrics read'),
|
||||
('query_logs', 'SigNoz logs read'),
|
||||
('error_logs_summary', 'SigNoz error logs summary read'),
|
||||
('list_approvals', 'Approval records read'),
|
||||
('get_approval', 'Approval detail read'),
|
||||
('list_incidents', 'Incident records read'),
|
||||
('list_timeline', 'Timeline records read'),
|
||||
('read_file', 'Filesystem allowlisted file read'),
|
||||
('list_directory', 'Filesystem allowlisted directory read'),
|
||||
('search_in_file', 'Filesystem allowlisted file search'),
|
||||
('list_dashboards', 'Grafana dashboards read'),
|
||||
('get_dashboard', 'Grafana dashboard read'),
|
||||
('get_panel_data', 'Grafana panel data read'),
|
||||
('generate_dashboard_url', 'Grafana dashboard URL read'),
|
||||
('search_runbook', 'Runbook semantic search read'),
|
||||
('get_index_stats', 'Runbook index stats read'),
|
||||
('argocd_list_apps', 'ArgoCD apps read'),
|
||||
('argocd_get_app_status', 'ArgoCD app status read'),
|
||||
('argocd_get_sync_history', 'ArgoCD sync history read'),
|
||||
('sentry_list_issues', 'Sentry issues read'),
|
||||
('sentry_get_issue', 'Sentry issue detail read'),
|
||||
('sentry_search_issues', 'Sentry issue search read')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM read_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id
|
||||
),
|
||||
grant_agents(agent_id) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator'),
|
||||
('post_execution_verifier')
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
grant_agents.agent_id,
|
||||
upsert_tools.tool_id,
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
'["read"]'::jsonb,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
CROSS JOIN grant_agents
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'awoooi_read_tools',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
|
||||
-- v4 exists only to retrigger run-migration after Gitea skipped the v2->v3 rename-only push.
|
||||
@@ -0,0 +1,79 @@
|
||||
-- Rollback for T7 awoooi read-only MCP Gateway seed.
|
||||
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND is_revoked = FALSE;
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name IN (
|
||||
'k8s_get_pod_logs',
|
||||
'k8s_get_events',
|
||||
'k8s_describe_pod',
|
||||
'k8s_get_hpa_status',
|
||||
'k8s_get_node_conditions',
|
||||
'ssh_diagnose',
|
||||
'ssh_get_top_processes',
|
||||
'ssh_get_disk_usage',
|
||||
'ssh_get_memory_info',
|
||||
'ssh_get_container_logs',
|
||||
'ssh_get_container_status',
|
||||
'ssh_get_service_status',
|
||||
'ssh_check_port',
|
||||
'ssh_get_nginx_error_log',
|
||||
'ssh_get_swap_info',
|
||||
'prometheus_query',
|
||||
'prometheus_query_range',
|
||||
'prometheus_get_alert_history',
|
||||
'gold_metrics',
|
||||
'trace_url',
|
||||
'system_metrics',
|
||||
'query_logs',
|
||||
'error_logs_summary',
|
||||
'list_approvals',
|
||||
'get_approval',
|
||||
'list_incidents',
|
||||
'list_timeline',
|
||||
'read_file',
|
||||
'list_directory',
|
||||
'search_in_file',
|
||||
'list_dashboards',
|
||||
'get_dashboard',
|
||||
'get_panel_data',
|
||||
'generate_dashboard_url',
|
||||
'search_runbook',
|
||||
'get_index_stats',
|
||||
'argocd_list_apps',
|
||||
'argocd_get_app_status',
|
||||
'argocd_get_sync_history',
|
||||
'sentry_list_issues',
|
||||
'sentry_get_issue',
|
||||
'sentry_search_issues'
|
||||
);
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'revoked'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND lifecycle_status = 'active';
|
||||
|
||||
-- v4 rollback companion for the retrigger migration.
|
||||
@@ -0,0 +1,77 @@
|
||||
-- T16 verifier gap: allow rollout status evidence through AwoooP MCP Gateway.
|
||||
-- Boundary: read-only scope only; no restart/delete/scale grant is added here.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH upsert_tool AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
VALUES (
|
||||
'awoooi',
|
||||
'k8s_watch_rollout',
|
||||
'mcp_server',
|
||||
'Kubernetes deployment rollout status read',
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id
|
||||
),
|
||||
grant_agents(agent_id) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator'),
|
||||
('post_execution_verifier')
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
grant_agents.agent_id,
|
||||
upsert_tool.tool_id,
|
||||
'migration:t16_rollout_verifier_seed',
|
||||
'["read"]'::jsonb,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tool
|
||||
CROSS JOIN grant_agents
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'k8s_watch_rollout_read_grants' AS seed,
|
||||
(SELECT count(*) FROM upsert_tool) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Roll back T16 rollout verifier read grant seed.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'migration:t16_rollout_verifier_seed_down'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND tool_id IN (
|
||||
SELECT tool_id
|
||||
FROM awooop_mcp_tool_registry
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name = 'k8s_watch_rollout'
|
||||
);
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name = 'k8s_watch_rollout';
|
||||
@@ -0,0 +1,14 @@
|
||||
-- AwoooP Phase 5b:MCP Gateway blocked call 稽核覆蓋
|
||||
-- 日期:2026-05-06
|
||||
-- 維護者:Codex
|
||||
--
|
||||
-- Gate 1 / Gate 2 / 未知工具的 blocked call 可能發生在 tool registry row
|
||||
-- 取得之前。這些安全決策仍必須落稽核紀錄,因此 tool_id 允許為 NULL,
|
||||
-- 但 tool_name 仍維持必填,作為未知工具與早期 gate block 的追蹤線索。
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE awooop_mcp_gateway_audit
|
||||
ALTER COLUMN tool_id DROP NOT NULL;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,21 @@
|
||||
-- AwoooP Phase 7 T15b: inbound event truth-chain columns
|
||||
--
|
||||
-- Purpose:
|
||||
-- Telegram cards are only the notification surface. Operators need a
|
||||
-- redacted replay envelope for inbound alerts so Alertmanager, Sentry, and
|
||||
-- SignOz events can be correlated with incidents, approvals, logs, and
|
||||
-- automation decisions without storing raw secrets or PII.
|
||||
|
||||
ALTER TABLE awooop_conversation_event
|
||||
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
|
||||
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
|
||||
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
|
||||
|
||||
COMMENT ON COLUMN awooop_conversation_event.content_redacted IS
|
||||
'Full inbound event content after audit_sink redaction; raw unredacted payload text is not stored.';
|
||||
|
||||
COMMENT ON COLUMN awooop_conversation_event.redaction_version IS
|
||||
'Redaction algorithm/version used for content_redacted and source_envelope.';
|
||||
|
||||
COMMENT ON COLUMN awooop_conversation_event.source_envelope IS
|
||||
'Redacted source metadata for inbound replay/audit, including payload hash, provider, source refs, and log correlation hints.';
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Rollback for AwoooP Phase 7 T15b inbound truth-chain columns.
|
||||
-- Safe only if no consumers depend on the redacted replay fields.
|
||||
|
||||
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS source_envelope;
|
||||
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS redaction_version;
|
||||
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS content_redacted;
|
||||
@@ -0,0 +1,21 @@
|
||||
-- AwoooP Phase 7 T1: outbound message truth-chain columns
|
||||
--
|
||||
-- Purpose:
|
||||
-- Telegram must remain a summary channel, but the operator console needs a
|
||||
-- complete redacted replay of the rendered card and the source envelope that
|
||||
-- produced it. Store redacted content only; raw unredacted Telegram text stays
|
||||
-- out of PostgreSQL.
|
||||
|
||||
ALTER TABLE awooop_outbound_message
|
||||
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
|
||||
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
|
||||
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
|
||||
|
||||
COMMENT ON COLUMN awooop_outbound_message.content_redacted IS
|
||||
'Full rendered outbound content after audit_sink redaction; raw unredacted text is not stored.';
|
||||
|
||||
COMMENT ON COLUMN awooop_outbound_message.redaction_version IS
|
||||
'Redaction algorithm/version used for content_redacted and source_envelope.';
|
||||
|
||||
COMMENT ON COLUMN awooop_outbound_message.source_envelope IS
|
||||
'Redacted source metadata for replay/audit, including payload hash and adapter context.';
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Rollback for AwoooP Phase 7 T1 outbound truth-chain columns.
|
||||
-- Safe only if no consumers depend on the redacted replay fields.
|
||||
|
||||
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS source_envelope;
|
||||
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS redaction_version;
|
||||
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS content_redacted;
|
||||
@@ -6,10 +6,12 @@
|
||||
-- bge-m3 產生 1024 維向量,現有 schema vector(768) 不相容,INSERT 會直接失敗
|
||||
--
|
||||
-- 影響範圍:
|
||||
-- 1. rag_chunks.embedding vector(768) → vector(1024)
|
||||
-- 2. playbook_embeddings.embedding vector(768) → vector(1024)
|
||||
-- 1. knowledge_entries.embedding vector(768) → vector(1024)
|
||||
-- 2. rag_chunks.embedding vector(768) → vector(1024)
|
||||
-- 3. playbook_embeddings.embedding vector(768) → vector(1024)
|
||||
--
|
||||
-- 遷移策略:清空現有向量資料,切換維度後由 re-embed script 重新嵌入
|
||||
-- 遷移策略:僅在欄位不是 vector(1024) 時清空現有向量資料,切換維度後由 re-embed script 重新嵌入
|
||||
-- 已經是 vector(1024) 的環境重跑本 migration 時,必須保留既有向量資料。
|
||||
-- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換)
|
||||
--
|
||||
-- 執行前置條件:
|
||||
@@ -21,13 +23,69 @@
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- 1. rag_chunks:清空向量資料,變更欄位維度
|
||||
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
|
||||
DROP INDEX IF EXISTS idx_rag_chunks_embedding;
|
||||
-- 1. knowledge_entries:備份舊向量並清空,變更欄位維度
|
||||
DO $$
|
||||
DECLARE
|
||||
v_dim integer;
|
||||
BEGIN
|
||||
SELECT a.atttypmod INTO v_dim
|
||||
FROM pg_attribute a
|
||||
JOIN pg_class c ON a.attrelid = c.oid
|
||||
WHERE c.relname = 'knowledge_entries'
|
||||
AND a.attname = 'embedding';
|
||||
|
||||
ALTER TABLE rag_chunks
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
|
||||
IF v_dim IS DISTINCT FROM 1024 THEN
|
||||
EXECUTE $sql$
|
||||
CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS
|
||||
SELECT
|
||||
id,
|
||||
embedding::text AS embedding_768,
|
||||
NOW() AS backed_up_at
|
||||
FROM knowledge_entries
|
||||
WHERE embedding IS NOT NULL
|
||||
$sql$;
|
||||
|
||||
EXECUTE $sql$
|
||||
ALTER TABLE knowledge_entries
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL
|
||||
$sql$;
|
||||
|
||||
RAISE NOTICE 'knowledge_entries.embedding migrated from vector(%) to vector(1024); old embeddings were backed up and cleared', v_dim;
|
||||
ELSE
|
||||
RAISE NOTICE 'knowledge_entries.embedding already vector(1024); existing embeddings preserved';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
COMMENT ON COLUMN knowledge_entries.embedding IS
|
||||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-05 ADR-110 follow-up)';
|
||||
|
||||
|
||||
-- 2. rag_chunks:清空向量資料,變更欄位維度
|
||||
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
|
||||
DO $$
|
||||
DECLARE
|
||||
v_dim integer;
|
||||
BEGIN
|
||||
SELECT a.atttypmod INTO v_dim
|
||||
FROM pg_attribute a
|
||||
JOIN pg_class c ON a.attrelid = c.oid
|
||||
WHERE c.relname = 'rag_chunks'
|
||||
AND a.attname = 'embedding';
|
||||
|
||||
IF v_dim IS DISTINCT FROM 1024 THEN
|
||||
EXECUTE 'DROP INDEX IF EXISTS idx_rag_chunks_embedding';
|
||||
EXECUTE $sql$
|
||||
ALTER TABLE rag_chunks
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL
|
||||
$sql$;
|
||||
|
||||
RAISE NOTICE 'rag_chunks.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
|
||||
ELSE
|
||||
RAISE NOTICE 'rag_chunks.embedding already vector(1024); existing embeddings preserved';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- 重建 ivfflat index(lists=100 適合 ~10k 筆以下資料)
|
||||
CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding
|
||||
@@ -39,12 +97,30 @@ COMMENT ON COLUMN rag_chunks.embedding IS
|
||||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)';
|
||||
|
||||
|
||||
-- 2. playbook_embeddings:清空向量資料,變更欄位維度
|
||||
DROP INDEX IF EXISTS ix_playbook_embeddings_vec;
|
||||
-- 3. playbook_embeddings:清空向量資料,變更欄位維度
|
||||
DO $$
|
||||
DECLARE
|
||||
v_dim integer;
|
||||
BEGIN
|
||||
SELECT a.atttypmod INTO v_dim
|
||||
FROM pg_attribute a
|
||||
JOIN pg_class c ON a.attrelid = c.oid
|
||||
WHERE c.relname = 'playbook_embeddings'
|
||||
AND a.attname = 'embedding';
|
||||
|
||||
ALTER TABLE playbook_embeddings
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL; -- 清空現有 768 維向量
|
||||
IF v_dim IS DISTINCT FROM 1024 THEN
|
||||
EXECUTE 'DROP INDEX IF EXISTS ix_playbook_embeddings_vec';
|
||||
EXECUTE $sql$
|
||||
ALTER TABLE playbook_embeddings
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL
|
||||
$sql$;
|
||||
|
||||
RAISE NOTICE 'playbook_embeddings.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
|
||||
ELSE
|
||||
RAISE NOTICE 'playbook_embeddings.embedding already vector(1024); existing embeddings preserved';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec
|
||||
ON playbook_embeddings
|
||||
@@ -61,9 +137,15 @@ COMMENT ON TABLE playbook_embeddings IS
|
||||
-- 3. 驗證遷移結果
|
||||
DO $$
|
||||
DECLARE
|
||||
v_km_dim integer;
|
||||
v_rag_dim integer;
|
||||
v_pb_dim integer;
|
||||
BEGIN
|
||||
SELECT atttypmod INTO v_km_dim
|
||||
FROM pg_attribute
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
WHERE relname = 'knowledge_entries' AND attname = 'embedding';
|
||||
|
||||
SELECT atttypmod INTO v_rag_dim
|
||||
FROM pg_attribute
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
@@ -74,15 +156,18 @@ BEGIN
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
WHERE relname = 'playbook_embeddings' AND attname = 'embedding';
|
||||
|
||||
-- atttypmod for vector(1024) = 1024 + 1 = 1025
|
||||
IF v_rag_dim != 1025 THEN
|
||||
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗:expected 1025, got %', v_rag_dim;
|
||||
-- pgvector atttypmod stores the configured dimension.
|
||||
IF v_km_dim != 1024 THEN
|
||||
RAISE EXCEPTION 'knowledge_entries.embedding 維度驗證失敗:expected 1024, got %', v_km_dim;
|
||||
END IF;
|
||||
IF v_pb_dim != 1025 THEN
|
||||
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗:expected 1025, got %', v_pb_dim;
|
||||
IF v_rag_dim != 1024 THEN
|
||||
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗:expected 1024, got %', v_rag_dim;
|
||||
END IF;
|
||||
IF v_pb_dim != 1024 THEN
|
||||
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗:expected 1024, got %', v_pb_dim;
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE '✅ embedding 遷移驗證通過:rag_chunks 和 playbook_embeddings 均為 vector(1024)';
|
||||
RAISE NOTICE '✅ embedding 遷移驗證通過:knowledge_entries、rag_chunks、playbook_embeddings 均為 vector(1024)';
|
||||
END $$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
"log_anomaly": "deepseek-r1:14b",
|
||||
"nemoclaw": "deepseek-r1:14b",
|
||||
"playbook_draft": "qwen3:14b",
|
||||
"code_review": "qwen2.5-coder:32b",
|
||||
"code_review": "qwen2.5-coder:7b",
|
||||
"embedding": "bge-m3:latest",
|
||||
"rag_generate": "qwen3:14b",
|
||||
"image_analysis": "minicpm-v:latest",
|
||||
@@ -175,7 +175,7 @@
|
||||
},
|
||||
"pr_code_review": {
|
||||
"phase": 32,
|
||||
"model": "qwen2.5-coder:32b",
|
||||
"model": "qwen2.5-coder:7b",
|
||||
"timeout_seconds": 120,
|
||||
"purpose": "Gitea PR 自動審查"
|
||||
},
|
||||
|
||||
@@ -46,6 +46,10 @@ dependencies = [
|
||||
# 2026-04-16 ogt + Claude Sonnet 4.6: SSH MCP sensor 修復 — asyncssh 缺失導致 sensors_succeeded=0
|
||||
# 根因: ssh_provider.py 中 import asyncssh 在 try/except 外,所有 15 個 SSH tool 直接 ImportError
|
||||
"asyncssh>=2.14.0",
|
||||
# 2026-05-31 Codex: AwoooP truth-chain Ansible runtime gate 需要
|
||||
# production API image 內真的存在 ansible-playbook,否則只能顯示
|
||||
# candidate audit,無法進入 check-mode executor readiness。
|
||||
"ansible-core>=2.16.0,<2.18.0",
|
||||
]
|
||||
|
||||
# [tool.uv.sources]
|
||||
|
||||
@@ -58,3 +58,8 @@ pytest>=7.4.0
|
||||
pytest-asyncio>=0.23.0
|
||||
ruff>=0.1.0
|
||||
sentry-sdk[fastapi]>=2.0.0
|
||||
|
||||
# AwoooP Ansible runtime readiness
|
||||
# 2026-05-31 Codex: production API image must include ansible-playbook before
|
||||
# truth-chain can honestly mark check-mode executor readiness as available.
|
||||
ansible-core>=2.16.0,<2.18.0
|
||||
|
||||
@@ -9,7 +9,7 @@ AwoooP Phase 1 Batch 1 回填腳本
|
||||
awooop_phase1_batch1_rls_2026-05-04.sql Step A(ADD COLUMN nullable)已執行
|
||||
|
||||
執行方式:
|
||||
export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
|
||||
從 secret manager / operator vault 設定 DATABASE_URL,禁止在指令或檔案中寫入 URL。
|
||||
cd apps/api && python scripts/awooop_phase1_batch1_backfill.py
|
||||
|
||||
2026-05-04 ogt + Claude Sonnet 4.6(ADR-118 Batch 1 C-3 修正)
|
||||
|
||||
@@ -37,6 +37,7 @@ logging = structlog.get_logger(__name__)
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://34.143.170.20:11434")
|
||||
EMBEDDING_MODEL = "bge-m3:latest"
|
||||
EXPECTED_DIM = 1024
|
||||
PROJECT_ID = os.getenv("AWOOOP_PROJECT_ID", "awoooi")
|
||||
|
||||
|
||||
async def embed_text(client: httpx.AsyncClient, text: str) -> list[float]:
|
||||
@@ -162,6 +163,7 @@ async def main(dry_run: bool, batch_size: int) -> None:
|
||||
|
||||
conn = await asyncpg.connect(database_url)
|
||||
try:
|
||||
await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID)
|
||||
# 統計待嵌入筆數
|
||||
rag_null = await conn.fetchval("SELECT COUNT(*) FROM rag_chunks WHERE embedding IS NULL")
|
||||
pb_null = await conn.fetchval("SELECT COUNT(*) FROM playbook_embeddings WHERE embedding IS NULL")
|
||||
|
||||
@@ -15,7 +15,7 @@ from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
# 2026-04-22 ogt: 移除硬碼 changeme,改為讀取環境變數(強制要求設定)。
|
||||
# 執行前: export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
|
||||
# 執行前: 從 secret manager / operator vault 設定 DATABASE_URL,禁止在指令或檔案中寫入 URL。
|
||||
DATABASE_URL = os.environ["DATABASE_URL"]
|
||||
|
||||
MIGRATION_SQLS = [
|
||||
|
||||
@@ -28,7 +28,7 @@ except ImportError:
|
||||
# ============================================================================
|
||||
|
||||
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.188:11434")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.110:11435")
|
||||
|
||||
if not NVIDIA_API_KEY:
|
||||
print("❌ 請設定 NVIDIA_API_KEY 環境變數")
|
||||
|
||||
@@ -22,17 +22,48 @@ from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from src.models.governance import (
|
||||
GovernanceEventsResponse,
|
||||
GovernanceQueueResponse,
|
||||
GovernanceSummaryResponse,
|
||||
KnowledgeReviewDraftArchiveRequest,
|
||||
KnowledgeReviewDraftArchiveResponse,
|
||||
KnowledgeReviewDraftDedupeResponse,
|
||||
KnowledgeStaleCandidatesResponse,
|
||||
KnowledgeStaleOwnerReviewBatchQueueRequest,
|
||||
KnowledgeStaleOwnerReviewBatchQueueResponse,
|
||||
KnowledgeStaleOwnerReviewBurnDownResponse,
|
||||
KnowledgeStaleOwnerReviewCompleteRequest,
|
||||
KnowledgeStaleOwnerReviewCompleteResponse,
|
||||
KnowledgeStaleOwnerReviewCompletionBatchPreviewRequest,
|
||||
KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse,
|
||||
KnowledgeStaleOwnerReviewCompletionQueueResponse,
|
||||
KnowledgeStaleOwnerReviewInboxResponse,
|
||||
KnowledgeStaleOwnerReviewRequest,
|
||||
KnowledgeStaleOwnerReviewResponse,
|
||||
)
|
||||
from src.services.governance_km_review_service import (
|
||||
KmReviewDraftArchiveError,
|
||||
archive_km_review_draft_duplicates,
|
||||
)
|
||||
from src.services.governance_km_stale_review_service import (
|
||||
KmStaleOwnerReviewError,
|
||||
batch_queue_km_stale_owner_reviews,
|
||||
complete_km_stale_owner_review,
|
||||
preview_km_stale_owner_review_completion_batch,
|
||||
query_km_stale_owner_review_burndown,
|
||||
query_km_stale_owner_review_completion_queue,
|
||||
query_km_stale_owner_review_inbox,
|
||||
queue_km_stale_owner_review,
|
||||
)
|
||||
from src.services.governance_query_service import (
|
||||
query_governance_events,
|
||||
query_governance_queue,
|
||||
query_governance_summary,
|
||||
query_km_review_draft_dedupe,
|
||||
query_km_stale_candidates,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -46,6 +77,7 @@ router = APIRouter()
|
||||
|
||||
@router.get("/ai/governance/events", response_model=GovernanceEventsResponse)
|
||||
async def get_governance_events(
|
||||
event_id: Annotated[list[str] | None, Query(alias="event_id")] = None,
|
||||
event_type: Annotated[list[str] | None, Query(alias="event_type")] = None,
|
||||
from_: Annotated[datetime | None, Query(alias="from")] = None,
|
||||
to: Annotated[datetime | None, Query(alias="to")] = None,
|
||||
@@ -58,6 +90,7 @@ async def get_governance_events(
|
||||
查詢 AI 治理事件列表(分頁)。
|
||||
|
||||
- event_type: 多值過濾(可重複傳)
|
||||
- event_id: 多值精準過濾(可重複傳),供 Telegram 詳情 / 歷史與 Work Items 錨點回看
|
||||
- from / to: ISO 8601 時間範圍(URL 傳 from 參數)
|
||||
- status: resolved / unresolved
|
||||
- severity: critical / warning / info(由 event_type 映射決定)
|
||||
@@ -66,6 +99,7 @@ async def get_governance_events(
|
||||
"""
|
||||
logger.debug(
|
||||
"governance_events_request",
|
||||
event_ids=event_id,
|
||||
event_types=event_type,
|
||||
from_=from_,
|
||||
to=to,
|
||||
@@ -75,6 +109,7 @@ async def get_governance_events(
|
||||
size=size,
|
||||
)
|
||||
return await query_governance_events(
|
||||
event_ids=event_id,
|
||||
event_types=event_type,
|
||||
from_dt=from_,
|
||||
to_dt=to,
|
||||
@@ -93,8 +128,9 @@ async def get_governance_events(
|
||||
async def get_governance_queue(
|
||||
dispatch_status: Annotated[
|
||||
str,
|
||||
Query(pattern="^(pending|dispatched|succeeded|failed)$"),
|
||||
Query(pattern="^(all|pending|dispatched|executing|succeeded|failed|skipped|cancelled)$"),
|
||||
] = "pending",
|
||||
event_type: Annotated[list[str] | None, Query(alias="event_type")] = None,
|
||||
page: Annotated[int, Query(ge=1)] = 1,
|
||||
size: Annotated[int, Query(ge=10, le=100)] = 20,
|
||||
) -> GovernanceQueueResponse:
|
||||
@@ -104,22 +140,360 @@ async def get_governance_queue(
|
||||
governance_remediation_dispatch 表由 Track D 建立,尚未完成時
|
||||
本 endpoint 回傳 { table_pending: true, items: [], total: 0 },不拋 500。
|
||||
|
||||
- dispatch_status: pending(default)/ dispatched / succeeded / failed
|
||||
- dispatch_status: pending(default)/ dispatched / executing / succeeded / failed / skipped / cancelled / all
|
||||
- event_type: 多值過濾(可重複傳)
|
||||
- page / size: 分頁
|
||||
"""
|
||||
logger.debug(
|
||||
"governance_queue_request",
|
||||
dispatch_status=dispatch_status,
|
||||
event_type=event_type,
|
||||
page=page,
|
||||
size=size,
|
||||
)
|
||||
return await query_governance_queue(
|
||||
dispatch_status=dispatch_status,
|
||||
event_types=event_type,
|
||||
page=page,
|
||||
size=size,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GET /api/v1/ai/governance/km-review-drafts/dedupe
|
||||
# =============================================================================
|
||||
|
||||
@router.get(
|
||||
"/ai/governance/km-review-drafts/dedupe",
|
||||
response_model=KnowledgeReviewDraftDedupeResponse,
|
||||
)
|
||||
async def get_km_review_draft_dedupe(
|
||||
limit: Annotated[int, Query(ge=10, le=200)] = 100,
|
||||
) -> KnowledgeReviewDraftDedupeResponse:
|
||||
"""
|
||||
查詢 Hermes KM healthcheck review drafts 的去重 read model。
|
||||
|
||||
這是 read-only owner review surface:只回傳 canonical / duplicate /
|
||||
owner_action,不自動 archive、不自動 approve/publish KM。
|
||||
"""
|
||||
logger.debug("km_review_draft_dedupe_request", limit=limit)
|
||||
return await query_km_review_draft_dedupe(limit=limit)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# POST /api/v1/ai/governance/km-review-drafts/dedupe/{event_id}/archive-duplicates
|
||||
# =============================================================================
|
||||
|
||||
@router.post(
|
||||
"/ai/governance/km-review-drafts/dedupe/{governance_event_id}/archive-duplicates",
|
||||
response_model=KnowledgeReviewDraftArchiveResponse,
|
||||
)
|
||||
async def post_km_review_draft_archive_duplicates(
|
||||
governance_event_id: str,
|
||||
request: KnowledgeReviewDraftArchiveRequest,
|
||||
) -> KnowledgeReviewDraftArchiveResponse:
|
||||
"""
|
||||
Owner 審核後封存 Hermes KM healthcheck duplicate review drafts。
|
||||
|
||||
這不是 read endpoint:必須明確傳 owner_approved=true,且後端會重新比對
|
||||
最新 dedupe plan。封存為 KnowledgeEntry.status=archived,不刪除資料。
|
||||
"""
|
||||
logger.info(
|
||||
"km_review_draft_archive_request",
|
||||
governance_event_id=governance_event_id,
|
||||
canonical_entry_id=request.canonical_entry_id,
|
||||
duplicate_count=len(request.duplicate_entry_ids),
|
||||
owner=request.owner,
|
||||
dry_run=request.dry_run,
|
||||
owner_approved=request.owner_approved,
|
||||
)
|
||||
try:
|
||||
return await archive_km_review_draft_duplicates(
|
||||
governance_event_id=governance_event_id,
|
||||
request=request,
|
||||
)
|
||||
except KmReviewDraftArchiveError as exc:
|
||||
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GET /api/v1/ai/governance/km-stale-candidates
|
||||
# =============================================================================
|
||||
|
||||
@router.get(
|
||||
"/ai/governance/km-stale-candidates",
|
||||
response_model=KnowledgeStaleCandidatesResponse,
|
||||
)
|
||||
async def get_km_stale_candidates(
|
||||
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
|
||||
limit: Annotated[int, Query(ge=5, le=100)] = 20,
|
||||
) -> KnowledgeStaleCandidatesResponse:
|
||||
"""
|
||||
查詢 stale KM 的 read-only 優先處理清單。
|
||||
|
||||
Hermes 可以用這個 read model 產生 KM 更新草稿;owner console 則能先看
|
||||
哪些條目有 Incident / Sentry / SigNoz / PlayBook 脈絡,避免只看到總數。
|
||||
"""
|
||||
logger.debug(
|
||||
"km_stale_candidates_request",
|
||||
project_id=project_id,
|
||||
limit=limit,
|
||||
)
|
||||
return await query_km_stale_candidates(project_id=project_id, limit=limit)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GET /api/v1/ai/governance/km-stale-owner-reviews
|
||||
# =============================================================================
|
||||
|
||||
@router.get(
|
||||
"/ai/governance/km-stale-owner-reviews",
|
||||
response_model=KnowledgeStaleOwnerReviewInboxResponse,
|
||||
)
|
||||
async def get_km_stale_owner_reviews(
|
||||
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
|
||||
dispatch_status: Annotated[
|
||||
str,
|
||||
Query(pattern="^(all|pending|dispatched|executing|succeeded|failed|skipped|cancelled)$"),
|
||||
] = "pending",
|
||||
limit: Annotated[int, Query(ge=5, le=100)] = 20,
|
||||
) -> KnowledgeStaleOwnerReviewInboxResponse:
|
||||
"""
|
||||
查詢 stale KM owner-review 工作台。
|
||||
|
||||
這是 read-only inbox:把 dispatch trail 與 KM priority context 合併,
|
||||
讓 operator 可以依 P0/P1、score、batch 來源與流程階段逐筆 completion。
|
||||
"""
|
||||
logger.debug(
|
||||
"km_stale_owner_reviews_request",
|
||||
project_id=project_id,
|
||||
dispatch_status=dispatch_status,
|
||||
limit=limit,
|
||||
)
|
||||
try:
|
||||
return await query_km_stale_owner_review_inbox(
|
||||
project_id=project_id,
|
||||
dispatch_status=dispatch_status,
|
||||
limit=limit,
|
||||
)
|
||||
except KmStaleOwnerReviewError as exc:
|
||||
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GET /api/v1/ai/governance/km-stale-owner-review-burndown
|
||||
# =============================================================================
|
||||
|
||||
@router.get(
|
||||
"/ai/governance/km-stale-owner-review-burndown",
|
||||
response_model=KnowledgeStaleOwnerReviewBurnDownResponse,
|
||||
)
|
||||
async def get_km_stale_owner_review_burndown(
|
||||
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
|
||||
limit: Annotated[int, Query(ge=1, le=100)] = 20,
|
||||
) -> KnowledgeStaleOwnerReviewBurnDownResponse:
|
||||
"""
|
||||
查詢 stale KM owner-review 完成與 stale ratio burn-down 狀態。
|
||||
|
||||
這是 read-only dashboard:把 pending review、completion audit、recheck
|
||||
snapshot 與距離治理門檻的剩餘筆數放在同一個前端面板。
|
||||
"""
|
||||
logger.debug(
|
||||
"km_stale_owner_review_burndown_request",
|
||||
project_id=project_id,
|
||||
limit=limit,
|
||||
)
|
||||
return await query_km_stale_owner_review_burndown(
|
||||
project_id=project_id,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GET /api/v1/ai/governance/km-stale-owner-review-completion-queue
|
||||
# =============================================================================
|
||||
|
||||
@router.get(
|
||||
"/ai/governance/km-stale-owner-review-completion-queue",
|
||||
response_model=KnowledgeStaleOwnerReviewCompletionQueueResponse,
|
||||
)
|
||||
async def get_km_stale_owner_review_completion_queue(
|
||||
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
|
||||
status_bucket: Annotated[
|
||||
str,
|
||||
Query(pattern="^(all|ready|blocked|completed|failed|pending)$"),
|
||||
] = "all",
|
||||
priority_tier: Annotated[list[str] | None, Query(alias="priority_tier")] = None,
|
||||
recommended_completion_outcome: Annotated[
|
||||
str,
|
||||
Query(pattern="^(all|refresh_with_evidence|archive|supersede)$"),
|
||||
] = "all",
|
||||
batch_governance_event_id: Annotated[str | None, Query(max_length=120)] = None,
|
||||
can_preview: bool | None = None,
|
||||
limit: Annotated[int, Query(ge=1, le=100)] = 20,
|
||||
) -> KnowledgeStaleOwnerReviewCompletionQueueResponse:
|
||||
"""
|
||||
查詢 stale KM owner-review completion 分流。
|
||||
|
||||
這是 read-only queue:把 active / completed / failed dispatch 拆成
|
||||
ready、blocked、completed、failed,讓前端呈現下一步卡點;打開頁面不寫 KM。
|
||||
"""
|
||||
logger.debug(
|
||||
"km_stale_owner_review_completion_queue_request",
|
||||
project_id=project_id,
|
||||
status_bucket=status_bucket,
|
||||
priority_tiers=priority_tier,
|
||||
recommended_completion_outcome=recommended_completion_outcome,
|
||||
batch_governance_event_id=batch_governance_event_id,
|
||||
can_preview=can_preview,
|
||||
limit=limit,
|
||||
)
|
||||
try:
|
||||
return await query_km_stale_owner_review_completion_queue(
|
||||
project_id=project_id,
|
||||
status_bucket=status_bucket,
|
||||
priority_tiers=priority_tier,
|
||||
recommended_completion_outcome=recommended_completion_outcome,
|
||||
batch_governance_event_id=batch_governance_event_id,
|
||||
can_preview=can_preview,
|
||||
limit=limit,
|
||||
)
|
||||
except KmStaleOwnerReviewError as exc:
|
||||
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# POST /api/v1/ai/governance/km-stale-owner-review-completion-queue/batch-preview
|
||||
# =============================================================================
|
||||
|
||||
@router.post(
|
||||
"/ai/governance/km-stale-owner-review-completion-queue/batch-preview",
|
||||
response_model=KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse,
|
||||
)
|
||||
async def post_km_stale_owner_review_completion_batch_preview(
|
||||
request: KnowledgeStaleOwnerReviewCompletionBatchPreviewRequest,
|
||||
) -> KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse:
|
||||
"""
|
||||
Preview a bounded set of owner-review completion candidates.
|
||||
|
||||
This endpoint is intentionally dry-run only: it does not write KM, does not
|
||||
enqueue a batch executor, and does not create governance audit rows. Each
|
||||
item must still be completed through the single-item dry-run + owner confirm
|
||||
endpoint.
|
||||
"""
|
||||
logger.info(
|
||||
"km_stale_owner_review_completion_batch_preview_request",
|
||||
project_id=request.project_id,
|
||||
status_bucket=request.status_bucket,
|
||||
priority_tiers=request.priority_tiers,
|
||||
recommended_completion_outcome=request.recommended_completion_outcome,
|
||||
batch_governance_event_id=request.batch_governance_event_id,
|
||||
limit=request.limit,
|
||||
owner=request.owner,
|
||||
)
|
||||
try:
|
||||
return await preview_km_stale_owner_review_completion_batch(request=request)
|
||||
except KmStaleOwnerReviewError as exc:
|
||||
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# POST /api/v1/ai/governance/km-stale-candidates/batch-queue-review
|
||||
# =============================================================================
|
||||
|
||||
@router.post(
|
||||
"/ai/governance/km-stale-candidates/batch-queue-review",
|
||||
response_model=KnowledgeStaleOwnerReviewBatchQueueResponse,
|
||||
)
|
||||
async def post_km_stale_candidate_batch_queue_review(
|
||||
request: KnowledgeStaleOwnerReviewBatchQueueRequest,
|
||||
) -> KnowledgeStaleOwnerReviewBatchQueueResponse:
|
||||
"""
|
||||
將 P0/P1 stale KM 批次排入 owner review。
|
||||
|
||||
這個 endpoint 只建立 batch audit 與逐筆 owner-review dispatch,不改寫 KM。
|
||||
真正 refresh / archive / supersede 仍需單筆 dry-run fingerprint + owner approval。
|
||||
"""
|
||||
logger.info(
|
||||
"km_stale_candidate_batch_queue_review_request",
|
||||
project_id=request.project_id,
|
||||
priority_tiers=request.priority_tiers,
|
||||
limit=request.limit,
|
||||
owner=request.owner,
|
||||
dry_run=request.dry_run,
|
||||
)
|
||||
try:
|
||||
return await batch_queue_km_stale_owner_reviews(request=request)
|
||||
except KmStaleOwnerReviewError as exc:
|
||||
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# POST /api/v1/ai/governance/km-stale-candidates/{entry_id}/queue-review
|
||||
# =============================================================================
|
||||
|
||||
@router.post(
|
||||
"/ai/governance/km-stale-candidates/{entry_id}/queue-review",
|
||||
response_model=KnowledgeStaleOwnerReviewResponse,
|
||||
)
|
||||
async def post_km_stale_candidate_queue_review(
|
||||
entry_id: str,
|
||||
request: KnowledgeStaleOwnerReviewRequest,
|
||||
) -> KnowledgeStaleOwnerReviewResponse:
|
||||
"""
|
||||
將單筆 stale KM candidate 排入 owner review。
|
||||
|
||||
這個 endpoint 只建立治理事件與 dispatch work item,不修改 KM 內容。
|
||||
實際 refresh / archive / supersede 仍需 owner 在後續流程確認。
|
||||
"""
|
||||
logger.info(
|
||||
"km_stale_candidate_queue_review_request",
|
||||
entry_id=entry_id,
|
||||
owner=request.owner,
|
||||
dry_run=request.dry_run,
|
||||
)
|
||||
try:
|
||||
return await queue_km_stale_owner_review(entry_id=entry_id, request=request)
|
||||
except KmStaleOwnerReviewError as exc:
|
||||
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# POST /api/v1/ai/governance/km-stale-candidates/{entry_id}/complete-review
|
||||
# =============================================================================
|
||||
|
||||
@router.post(
|
||||
"/ai/governance/km-stale-candidates/{entry_id}/complete-review",
|
||||
response_model=KnowledgeStaleOwnerReviewCompleteResponse,
|
||||
)
|
||||
async def post_km_stale_candidate_complete_review(
|
||||
entry_id: str,
|
||||
request: KnowledgeStaleOwnerReviewCompleteRequest,
|
||||
) -> KnowledgeStaleOwnerReviewCompleteResponse:
|
||||
"""
|
||||
Owner 審核後完成 stale KM 的 refresh / archive / supersede 流程。
|
||||
|
||||
必須先 dry-run 取得 fingerprint;真正寫入時需 owner_approved=true。
|
||||
後端會寫 KM、terminal audit dispatch 與 stale ratio recheck dispatch。
|
||||
"""
|
||||
logger.info(
|
||||
"km_stale_candidate_complete_review_request",
|
||||
entry_id=entry_id,
|
||||
dispatch_id=request.dispatch_id,
|
||||
owner=request.owner,
|
||||
review_outcome=request.review_outcome,
|
||||
dry_run=request.dry_run,
|
||||
owner_approved=request.owner_approved,
|
||||
)
|
||||
try:
|
||||
return await complete_km_stale_owner_review(
|
||||
entry_id=entry_id,
|
||||
request=request,
|
||||
)
|
||||
except KmStaleOwnerReviewError as exc:
|
||||
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GET /api/v1/ai/governance/summary
|
||||
# =============================================================================
|
||||
|
||||
@@ -18,8 +18,15 @@ Endpoints:
|
||||
from __future__ import annotations
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.services.adr100_remediation_service import (
|
||||
RemediationMode,
|
||||
RemediationNotFoundError,
|
||||
get_adr100_remediation_service,
|
||||
)
|
||||
from src.services.adr100_slo_status_service import get_adr100_slo_status_service
|
||||
from src.services.ai_slo_calculator import AiSloCalculator
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -27,6 +34,20 @@ logger = structlog.get_logger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class RemediationPreviewRequest(BaseModel):
|
||||
"""ADR-100 remediation preview request."""
|
||||
|
||||
work_item_id: str = Field(min_length=1)
|
||||
mode: RemediationMode = "auto"
|
||||
|
||||
|
||||
class RemediationDryRunRequest(BaseModel):
|
||||
"""ADR-100 remediation dry-run request."""
|
||||
|
||||
work_item_id: str = Field(min_length=1)
|
||||
mode: RemediationMode = "auto"
|
||||
|
||||
|
||||
@router.get("/ai/slo")
|
||||
async def get_ai_slo(
|
||||
force_refresh: bool = Query(False, description="忽略快取,強制重算"),
|
||||
@@ -50,9 +71,65 @@ async def get_ai_slo(
|
||||
if cached:
|
||||
data = cached.to_dict()
|
||||
data["cache_hit"] = True
|
||||
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||
return data
|
||||
|
||||
report = await calc.run()
|
||||
data = report.to_dict()
|
||||
data["cache_hit"] = False
|
||||
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||
return data
|
||||
|
||||
|
||||
@router.get("/ai/slo/remediation/preview")
|
||||
async def preview_ai_slo_remediation(
|
||||
work_item_id: str = Query(..., min_length=1),
|
||||
mode: RemediationMode = Query("auto"),
|
||||
) -> dict:
|
||||
"""Preview the safe remediation plan for one ADR-100 queue item."""
|
||||
|
||||
try:
|
||||
return await get_adr100_remediation_service().preview(work_item_id, mode)
|
||||
except RemediationNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
|
||||
|
||||
|
||||
@router.post("/ai/slo/remediation/preview")
|
||||
async def preview_ai_slo_remediation_post(request: RemediationPreviewRequest) -> dict:
|
||||
"""POST variant for clients that prefer JSON bodies."""
|
||||
|
||||
try:
|
||||
return await get_adr100_remediation_service().preview(
|
||||
request.work_item_id,
|
||||
request.mode,
|
||||
)
|
||||
except RemediationNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
|
||||
|
||||
|
||||
@router.post("/ai/slo/remediation/dry-run")
|
||||
async def dry_run_ai_slo_remediation(request: RemediationDryRunRequest) -> dict:
|
||||
"""Run a read-only ADR-100 remediation dry-run."""
|
||||
|
||||
try:
|
||||
return await get_adr100_remediation_service().dry_run(
|
||||
request.work_item_id,
|
||||
request.mode,
|
||||
)
|
||||
except RemediationNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
|
||||
|
||||
|
||||
@router.get("/ai/slo/remediation/history")
|
||||
async def list_ai_slo_remediation_history(
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
incident_id: str | None = Query(default=None, min_length=1),
|
||||
work_item_id: str | None = Query(default=None, min_length=1),
|
||||
) -> dict:
|
||||
"""List durable ADR-100 remediation dry-run history from alert_operation_log."""
|
||||
|
||||
return await get_adr100_remediation_service().history(
|
||||
limit=limit,
|
||||
incident_id=incident_id,
|
||||
work_item_id=work_item_id,
|
||||
)
|
||||
|
||||
@@ -20,6 +20,7 @@ from pydantic import BaseModel
|
||||
from src.core.config import settings
|
||||
from src.core.logging import get_logger
|
||||
from src.core.sse import EventPublisher, EventType, SSEEvent, get_publisher
|
||||
from src.services.dashboard_metrics_service import fetch_pending_approval_count
|
||||
from src.services.host_aggregator import AggregatedStatus, HostAggregator
|
||||
|
||||
router = APIRouter()
|
||||
@@ -141,12 +142,14 @@ async def dashboard_update_loop(publisher: EventPublisher) -> None:
|
||||
try:
|
||||
# Fetch aggregated status
|
||||
status = await HostAggregator.fetch_all()
|
||||
pending_approvals = await fetch_pending_approval_count()
|
||||
|
||||
# Publish to all connected clients
|
||||
event = SSEEvent(
|
||||
type=EventType.HOST_UPDATE,
|
||||
data={
|
||||
"overall_status": status.overall_status,
|
||||
"pending_approvals": pending_approvals,
|
||||
"hosts": [
|
||||
{
|
||||
"ip": h.ip,
|
||||
@@ -206,7 +209,9 @@ async def get_dashboard() -> DashboardResponse:
|
||||
logger.info("dashboard_fetch")
|
||||
|
||||
status = await HostAggregator.fetch_all()
|
||||
return aggregated_to_response(status)
|
||||
response = aggregated_to_response(status)
|
||||
response.pending_approvals = await fetch_pending_approval_count()
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/dashboard/stream")
|
||||
|
||||
@@ -13,10 +13,12 @@ leWOOOgo 積木化原則:
|
||||
建立者: Claude Code (Phase 25 P2)
|
||||
"""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.csrf import CSRFToken # Phase 20: CSRF Protection
|
||||
|
||||
from src.models.drift import (
|
||||
DriftListResponse,
|
||||
DriftReport,
|
||||
@@ -28,6 +30,10 @@ from src.repositories.drift_repository import get_drift_repository
|
||||
from src.services.drift_adopt_service import get_drift_adopt_service
|
||||
from src.services.drift_analyzer import get_drift_analyzer
|
||||
from src.services.drift_detector import get_drift_detector
|
||||
from src.services.drift_fingerprint_state_service import (
|
||||
DriftFingerprintStateNotFoundError,
|
||||
get_drift_fingerprint_state_service,
|
||||
)
|
||||
from src.services.drift_interpreter import get_drift_interpreter
|
||||
from src.services.drift_remediator import get_drift_remediator
|
||||
from src.utils.timezone import now_taipei
|
||||
@@ -37,6 +43,42 @@ router = APIRouter(prefix="/drift", tags=["drift"])
|
||||
# 2026-04-09 Claude Sonnet 4.6: B4 drift_reports 持久化 — 改用 DB repository
|
||||
|
||||
|
||||
class DriftFingerprintHandoffRequest(BaseModel):
|
||||
"""Record-only handoff request for a stable drift fingerprint."""
|
||||
|
||||
report_id: str | None = Field(default=None, min_length=1)
|
||||
namespace: str | None = Field(default="awoooi-prod", min_length=1)
|
||||
handoff_kind: Literal[
|
||||
"open_pr_review",
|
||||
"manual_investigation",
|
||||
"zero_diff_pr_cleanup",
|
||||
] = "open_pr_review"
|
||||
pr_url: str | None = Field(default=None, min_length=1)
|
||||
note: str | None = Field(default=None, max_length=500)
|
||||
|
||||
|
||||
class DriftFingerprintRemediationRequest(BaseModel):
|
||||
"""Record-only remediation request for a stable drift fingerprint."""
|
||||
|
||||
report_id: str | None = Field(default=None, min_length=1)
|
||||
namespace: str | None = Field(default="awoooi-prod", min_length=1)
|
||||
remediation_kind: Literal[
|
||||
"live_env_rollback",
|
||||
"git_adopted",
|
||||
"git_rollback",
|
||||
"zero_diff_pr_cleanup",
|
||||
"manual_noop",
|
||||
] = "live_env_rollback"
|
||||
remediation_status: Literal[
|
||||
"executed_unverified",
|
||||
"verified_no_drift",
|
||||
"verification_failed",
|
||||
] | None = None
|
||||
verification_report_id: str | None = Field(default=None, min_length=1)
|
||||
note: str | None = Field(default=None, max_length=1000)
|
||||
commands_summary: list[str] = Field(default_factory=list, max_length=12)
|
||||
|
||||
|
||||
@router.post("/scan", response_model=DriftScanResponse, summary="觸發漂移掃描")
|
||||
async def trigger_drift_scan(
|
||||
request: DriftScanRequest,
|
||||
@@ -99,6 +141,72 @@ async def list_drift_reports() -> DriftListResponse:
|
||||
return DriftListResponse(items=items, total=len(items))
|
||||
|
||||
|
||||
@router.get("/fingerprints/state", summary="查詢 Config Drift fingerprint 狀態")
|
||||
async def get_drift_fingerprint_state(
|
||||
report_id: str | None = None,
|
||||
namespace: str | None = "awoooi-prod",
|
||||
) -> dict:
|
||||
"""
|
||||
以 stable fingerprint 聚合漂移狀態。
|
||||
|
||||
此 endpoint 只建立 read model:重複次數、PR 狀態、是否零 diff、
|
||||
人工交接歷史與下一步。它不修改 drift / incident / auto-repair 狀態。
|
||||
"""
|
||||
svc = get_drift_fingerprint_state_service()
|
||||
try:
|
||||
return await svc.get_state(report_id=report_id, namespace=namespace)
|
||||
except DriftFingerprintStateNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="drift_report_not_found") from exc
|
||||
|
||||
|
||||
@router.post("/fingerprints/handoff", summary="記錄 Config Drift fingerprint 交接")
|
||||
async def record_drift_fingerprint_handoff(
|
||||
request: DriftFingerprintHandoffRequest,
|
||||
) -> dict:
|
||||
"""
|
||||
記錄 stable fingerprint 已轉人工 / PR review 的歷史證據。
|
||||
|
||||
安全邊界:只寫 alert_operation_log / timeline_events,不修改 drift 狀態、
|
||||
incident 狀態、自動修復結果,不建立外部 ticket,也不 merge PR。
|
||||
"""
|
||||
svc = get_drift_fingerprint_state_service()
|
||||
try:
|
||||
return await svc.record_handoff(
|
||||
report_id=request.report_id,
|
||||
namespace=request.namespace,
|
||||
handoff_kind=request.handoff_kind,
|
||||
pr_url=request.pr_url,
|
||||
note=request.note,
|
||||
)
|
||||
except DriftFingerprintStateNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="drift_report_not_found") from exc
|
||||
|
||||
|
||||
@router.post("/fingerprints/remediation", summary="記錄 Config Drift fingerprint 修復")
|
||||
async def record_drift_fingerprint_remediation(
|
||||
request: DriftFingerprintRemediationRequest,
|
||||
) -> dict:
|
||||
"""
|
||||
記錄 stable fingerprint 已完成的修復 / 驗證證據。
|
||||
|
||||
安全邊界:只寫 alert_operation_log / timeline_events,不修改 drift 狀態、
|
||||
incident 狀態、自動修復結果,不建立外部 ticket,也不執行 kubectl。
|
||||
"""
|
||||
svc = get_drift_fingerprint_state_service()
|
||||
try:
|
||||
return await svc.record_remediation(
|
||||
report_id=request.report_id,
|
||||
namespace=request.namespace,
|
||||
remediation_kind=request.remediation_kind,
|
||||
remediation_status=request.remediation_status,
|
||||
verification_report_id=request.verification_report_id,
|
||||
note=request.note,
|
||||
commands_summary=request.commands_summary,
|
||||
)
|
||||
except DriftFingerprintStateNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="drift_report_not_found") from exc
|
||||
|
||||
|
||||
@router.post("/reports/{report_id}/rollback", summary="覆蓋回 Git 狀態")
|
||||
async def rollback_drift(report_id: str, _csrf_token: CSRFToken) -> dict: # Phase 20: CSRF Protection (驗證用,不需要使用值)
|
||||
"""
|
||||
|
||||
@@ -418,7 +418,9 @@ async def _send_gitea_notification(
|
||||
logger.debug("gitea_tg_skipped", reason="Bot token not configured")
|
||||
return
|
||||
|
||||
from src.services.telegram_gateway import get_telegram_gateway # type: ignore[import]
|
||||
from src.services.telegram_gateway import (
|
||||
get_telegram_gateway, # type: ignore[import]
|
||||
)
|
||||
gateway = get_telegram_gateway()
|
||||
await gateway.initialize()
|
||||
await gateway.send_alert_notification(message)
|
||||
@@ -502,15 +504,22 @@ async def handle_pull_request(
|
||||
review_id = f"gitea-pr-{payload.repository.id}-{pr.number}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# 背景執行審查 (委派給 Service)
|
||||
service = get_gitea_webhook_service()
|
||||
background_tasks.add_task(
|
||||
service.review_pull_request,
|
||||
repo=payload.repository,
|
||||
pr=pr,
|
||||
sender=payload.sender,
|
||||
review_id=review_id,
|
||||
action=payload.action,
|
||||
)
|
||||
if settings.MOCK_MODE:
|
||||
logger.info(
|
||||
"gitea_pr_review_background_skipped_mock_mode",
|
||||
review_id=review_id,
|
||||
repo=payload.repository.full_name,
|
||||
)
|
||||
else:
|
||||
service = get_gitea_webhook_service()
|
||||
background_tasks.add_task(
|
||||
service.review_pull_request,
|
||||
repo=payload.repository,
|
||||
pr=pr,
|
||||
sender=payload.sender,
|
||||
review_id=review_id,
|
||||
action=payload.action,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"gitea_pr_review_scheduled",
|
||||
@@ -561,17 +570,24 @@ async def handle_push(
|
||||
review_id = f"gitea-push-{payload.repository.id}-{payload.after[:8]}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# 背景執行審查 (委派給 Service)
|
||||
service = get_gitea_webhook_service()
|
||||
background_tasks.add_task(
|
||||
service.review_push,
|
||||
repo=payload.repository,
|
||||
commits=commits,
|
||||
sender=payload.sender,
|
||||
review_id=review_id,
|
||||
ref=ref,
|
||||
before_sha=payload.before,
|
||||
after_sha=payload.after,
|
||||
)
|
||||
if settings.MOCK_MODE:
|
||||
logger.info(
|
||||
"gitea_push_review_background_skipped_mock_mode",
|
||||
review_id=review_id,
|
||||
repo=payload.repository.full_name,
|
||||
)
|
||||
else:
|
||||
service = get_gitea_webhook_service()
|
||||
background_tasks.add_task(
|
||||
service.review_push,
|
||||
repo=payload.repository,
|
||||
commits=commits,
|
||||
sender=payload.sender,
|
||||
review_id=review_id,
|
||||
ref=ref,
|
||||
before_sha=payload.before,
|
||||
after_sha=payload.after,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"gitea_push_review_scheduled",
|
||||
|
||||
@@ -11,7 +11,7 @@ Endpoints:
|
||||
Components Checked:
|
||||
- PostgreSQL (192.168.0.188:5432)
|
||||
- Redis (192.168.0.188:6380)
|
||||
- Ollama (192.168.0.188:11434)
|
||||
- Ollama ADR-110 provider pool (GCP-A -> GCP-B -> 111)
|
||||
- OpenClaw (192.168.0.188:8089)
|
||||
- SigNoz (192.168.0.188:3301)
|
||||
"""
|
||||
@@ -26,9 +26,16 @@ from pydantic import BaseModel
|
||||
from src.core.config import settings
|
||||
from src.core.logging import get_logger
|
||||
from src.services.health_check_service import get_health_check_service
|
||||
from src.services.ollama_endpoint_circuit_breaker import (
|
||||
get_ollama_endpoint_cooldown_remaining_seconds,
|
||||
record_ollama_endpoint_failure,
|
||||
record_ollama_endpoint_success,
|
||||
)
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||
|
||||
router = APIRouter()
|
||||
logger = get_logger("awoooi.health")
|
||||
CORE_COMPONENTS = ("api", "postgresql", "redis", "ollama", "openclaw", "signoz")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -40,6 +47,11 @@ class ComponentHealth(BaseModel):
|
||||
status: Literal["up", "down", "degraded"]
|
||||
latency_ms: float | None = None
|
||||
error: str | None = None
|
||||
provider_name: str | None = None
|
||||
diagnosis_code: str | None = None
|
||||
retry_after_seconds: float | None = None
|
||||
cooldown_remaining_seconds: float | None = None
|
||||
is_cooldown: bool = False
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
@@ -50,6 +62,7 @@ class HealthResponse(BaseModel):
|
||||
mock_mode: bool
|
||||
timestamp: datetime
|
||||
components: dict[str, ComponentHealth]
|
||||
ollama_route_order: list[str] = []
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -106,8 +119,125 @@ async def check_redis() -> ComponentHealth:
|
||||
|
||||
|
||||
async def check_ollama() -> ComponentHealth:
|
||||
"""Async Ollama health check via /api/tags"""
|
||||
return await _http_health_check("ollama", settings.OLLAMA_URL, "/api/tags")
|
||||
"""Async aggregate Ollama health check via ADR-110 provider chain."""
|
||||
aggregate, _details = await check_ollama_provider_chain()
|
||||
return aggregate
|
||||
|
||||
|
||||
async def check_ollama_provider_chain() -> tuple[ComponentHealth, dict[str, ComponentHealth]]:
|
||||
"""
|
||||
Check the full Ollama provider chain.
|
||||
|
||||
The aggregate ``ollama`` component represents route availability:
|
||||
- up: GCP-A is reachable
|
||||
- degraded: GCP-A is unavailable but GCP-B or 111 is reachable
|
||||
- down: no configured Ollama endpoint is reachable
|
||||
"""
|
||||
selections = tuple(
|
||||
selection
|
||||
for selection in resolve_ollama_order("healthcheck")
|
||||
if selection.url and selection.provider_name != "ollama_unconfigured"
|
||||
)
|
||||
if not selections:
|
||||
aggregate = ComponentHealth(
|
||||
status="down",
|
||||
error="no Ollama endpoints configured",
|
||||
)
|
||||
return aggregate, {}
|
||||
|
||||
checked = await asyncio.gather(
|
||||
*(
|
||||
_ollama_endpoint_health_check(selection.provider_name, selection.url)
|
||||
for selection in selections
|
||||
)
|
||||
)
|
||||
details = {
|
||||
selection.provider_name: result
|
||||
for selection, result in zip(selections, checked, strict=False)
|
||||
}
|
||||
|
||||
primary = selections[0]
|
||||
primary_status = details[primary.provider_name].status
|
||||
if primary.provider_name == "ollama_gcp_a" and primary_status == "up":
|
||||
return details[primary.provider_name], details
|
||||
|
||||
first_available = next(
|
||||
(
|
||||
selection
|
||||
for selection in selections
|
||||
if details[selection.provider_name].status == "up"
|
||||
),
|
||||
None,
|
||||
)
|
||||
if first_available:
|
||||
fallback = details[first_available.provider_name]
|
||||
return (
|
||||
ComponentHealth(
|
||||
status="degraded",
|
||||
latency_ms=fallback.latency_ms,
|
||||
error=f"primary unavailable; fallback active: {first_available.provider_name}",
|
||||
),
|
||||
details,
|
||||
)
|
||||
|
||||
errors = ", ".join(
|
||||
f"{provider}={health.error or health.status}"
|
||||
for provider, health in details.items()
|
||||
)
|
||||
return (
|
||||
ComponentHealth(
|
||||
status="down",
|
||||
error=f"all Ollama endpoints unavailable: {errors}",
|
||||
),
|
||||
details,
|
||||
)
|
||||
|
||||
|
||||
async def _ollama_endpoint_health_check(name: str, url: str) -> ComponentHealth:
|
||||
cooldown_remaining = get_ollama_endpoint_cooldown_remaining_seconds(url)
|
||||
if cooldown_remaining > 0:
|
||||
return ComponentHealth(
|
||||
status="down",
|
||||
error=f"recent endpoint failure cooldown: {cooldown_remaining:.0f}s",
|
||||
provider_name=name,
|
||||
diagnosis_code="endpoint_cooldown",
|
||||
retry_after_seconds=round(cooldown_remaining, 1),
|
||||
cooldown_remaining_seconds=round(cooldown_remaining, 1),
|
||||
is_cooldown=True,
|
||||
)
|
||||
|
||||
result = await _http_health_check(name, url, "/api/tags")
|
||||
result.provider_name = name
|
||||
if result.status == "up":
|
||||
result.diagnosis_code = "endpoint_reachable"
|
||||
record_ollama_endpoint_success(url)
|
||||
else:
|
||||
result.diagnosis_code = _classify_ollama_endpoint_failure(name, result.error)
|
||||
record_ollama_endpoint_failure(url)
|
||||
return result
|
||||
|
||||
|
||||
def _classify_ollama_endpoint_failure(
|
||||
provider_name: str,
|
||||
error: str | None,
|
||||
) -> str:
|
||||
"""Return a stable diagnosis code for UI/alert rendering."""
|
||||
normalized_error = (error or "").lower()
|
||||
if "cooldown" in normalized_error:
|
||||
return "endpoint_cooldown"
|
||||
if "502" in normalized_error or "bad gateway" in normalized_error:
|
||||
return (
|
||||
"local_proxy_upstream_unreachable"
|
||||
if provider_name == "ollama_local"
|
||||
else "proxy_upstream_unreachable"
|
||||
)
|
||||
if "timeout" in normalized_error:
|
||||
return "endpoint_timeout"
|
||||
if "connection refused" in normalized_error:
|
||||
return "endpoint_connection_refused"
|
||||
if "no route to host" in normalized_error or "network is unreachable" in normalized_error:
|
||||
return "endpoint_network_unreachable"
|
||||
return "endpoint_unreachable"
|
||||
|
||||
|
||||
async def check_openclaw() -> ComponentHealth:
|
||||
@@ -120,6 +250,30 @@ async def check_signoz() -> ComponentHealth:
|
||||
return await _http_health_check("signoz", settings.SIGNOZ_URL, "/api/v1/health")
|
||||
|
||||
|
||||
def _determine_overall_status(
|
||||
components: dict[str, ComponentHealth],
|
||||
) -> Literal["healthy", "degraded", "unhealthy"]:
|
||||
"""Determine overall health from core aggregate components only."""
|
||||
statuses = [
|
||||
components[name].status
|
||||
for name in CORE_COMPONENTS
|
||||
if name in components
|
||||
]
|
||||
down_count = statuses.count("down")
|
||||
degraded_count = statuses.count("degraded")
|
||||
|
||||
critical_down = (
|
||||
components.get("postgresql", ComponentHealth(status="down")).status == "down"
|
||||
or components.get("redis", ComponentHealth(status="down")).status == "down"
|
||||
)
|
||||
|
||||
if critical_down or down_count >= 3:
|
||||
return "unhealthy"
|
||||
if down_count >= 1 or degraded_count > 0:
|
||||
return "degraded"
|
||||
return "healthy"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoints
|
||||
# =============================================================================
|
||||
@@ -142,34 +296,28 @@ async def get_health() -> HealthResponse:
|
||||
results = await asyncio.gather(
|
||||
check_postgresql(),
|
||||
check_redis(),
|
||||
check_ollama(),
|
||||
check_ollama_provider_chain(),
|
||||
check_openclaw(),
|
||||
check_signoz(),
|
||||
)
|
||||
|
||||
ollama_aggregate, ollama_details = results[2]
|
||||
components = {
|
||||
"api": ComponentHealth(status="up", latency_ms=0.0),
|
||||
"postgresql": results[0],
|
||||
"redis": results[1],
|
||||
"ollama": results[2],
|
||||
"ollama": ollama_aggregate,
|
||||
"openclaw": results[3],
|
||||
"signoz": results[4],
|
||||
}
|
||||
components.update(ollama_details)
|
||||
|
||||
# Determine overall status
|
||||
statuses = [c.status for c in components.values()]
|
||||
down_count = statuses.count("down")
|
||||
degraded_count = statuses.count("degraded")
|
||||
|
||||
# Critical services: postgresql, redis
|
||||
critical_down = components["postgresql"].status == "down" or components["redis"].status == "down"
|
||||
|
||||
if critical_down or down_count >= 3:
|
||||
overall_status: Literal["healthy", "degraded", "unhealthy"] = "unhealthy"
|
||||
elif down_count >= 1 or degraded_count > 0:
|
||||
overall_status = "degraded"
|
||||
else:
|
||||
overall_status = "healthy"
|
||||
overall_status = _determine_overall_status(components)
|
||||
ollama_route_order = [
|
||||
selection.provider_name
|
||||
for selection in resolve_ollama_order("healthcheck")
|
||||
if selection.url and selection.provider_name != "ollama_unconfigured"
|
||||
]
|
||||
|
||||
logger.info(
|
||||
"health_check_complete",
|
||||
@@ -185,6 +333,7 @@ async def get_health() -> HealthResponse:
|
||||
mock_mode=settings.MOCK_MODE,
|
||||
timestamp=datetime.now(UTC),
|
||||
components=components,
|
||||
ollama_route_order=ollama_route_order,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -17,9 +17,10 @@ Phase 6.4 核心功能:
|
||||
- Proposal 必須關聯到 Incident
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
from fastapi import APIRouter, HTTPException, Query, status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.logging import get_logger
|
||||
@@ -133,6 +134,7 @@ class IncidentTimelineResponse(BaseModel):
|
||||
timeline: list[IncidentTimelineStage] = Field(default_factory=list)
|
||||
events: list[IncidentTimelineEvent] = Field(default_factory=list)
|
||||
ascii_timeline: str
|
||||
reconciliation: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -148,18 +150,26 @@ class IncidentTimelineResponse(BaseModel):
|
||||
|
||||
Phase 6.5 升級:
|
||||
- 每個事件自動附帶 decision_token
|
||||
- 確保 UI 永遠有決策可操作
|
||||
- 雙軌引擎: LLM (主) + Expert System (備)
|
||||
- 預設只讀取已存在的 decision_token
|
||||
- 需要新決策時改由明確的 proposal / operator run 入口觸發
|
||||
""",
|
||||
)
|
||||
async def list_incidents() -> IncidentListResponse:
|
||||
async def list_incidents(
|
||||
generate_missing_decisions: bool = Query(
|
||||
False,
|
||||
description=(
|
||||
"預設 false,列表查詢只讀既有 decision token;"
|
||||
"true 僅供明確維運操作使用,會背景產生缺少的決策。"
|
||||
),
|
||||
),
|
||||
) -> IncidentListResponse:
|
||||
"""
|
||||
取得活躍事件清單
|
||||
|
||||
Phase 6.5: 自動為每個事件生成決策令牌
|
||||
- P0/P1 事件優先處理
|
||||
- 30 秒內保證有決策
|
||||
- LLM 失敗時 Expert System 保底
|
||||
Phase 6.5: 附帶既有決策令牌
|
||||
- 列表查詢必須是低成本純讀路徑
|
||||
- 不可因為前端輪詢就背景觸發 LLM / Ollama / OpenClaw
|
||||
- 需要新決策時,呼叫 POST /api/v1/incidents/{incident_id}/proposal
|
||||
|
||||
Returns:
|
||||
IncidentListResponse: 事件清單與計數 (含決策令牌)
|
||||
@@ -174,8 +184,6 @@ async def list_incidents() -> IncidentListResponse:
|
||||
|
||||
# 按時間排序 (最新優先)
|
||||
# 2026-03-26 修復: 處理 timezone-aware 與 naive datetime 混合問題
|
||||
from datetime import UTC
|
||||
|
||||
def safe_created_at(i: Incident) -> float:
|
||||
"""安全取得 timestamp,處理 timezone 混合問題"""
|
||||
dt = i.created_at
|
||||
@@ -189,15 +197,24 @@ async def list_incidents() -> IncidentListResponse:
|
||||
# 2026-04-09 Claude Sonnet 4.6: 效能修復 — list endpoint 不同步等待 AI
|
||||
# 原設計: 每個 incident await AI 決策 (120-180s timeout),多 incident 時乘積爆炸
|
||||
# 修復: 只取已存在的決策 token,若無則背景觸發生成,前端 poll 單筆 GET 取得結果
|
||||
import asyncio
|
||||
#
|
||||
# 2026-05-06 Codex: 成本與推理槽修復 — 預設不再背景觸發 AI。
|
||||
# 根因: 多個前端頁面會輪詢 GET /incidents;若列表查詢偷偷 create_task,
|
||||
# 每次頁面載入都可能消耗 GCP Ollama / OpenClaw 推理槽,甚至 fallback 到 Gemini。
|
||||
# 新規則: GET list 是純讀;生成新修復建議必須走明確 proposal/operator-run 入口。
|
||||
if generate_missing_decisions:
|
||||
import asyncio
|
||||
|
||||
responses = []
|
||||
background_tasks = []
|
||||
existing_tokens = await decision_manager._find_existing_tokens_for_incidents(
|
||||
[incident.incident_id for incident in incidents]
|
||||
)
|
||||
|
||||
for incident in incidents:
|
||||
try:
|
||||
# 只查已快取的決策 (不等待 AI,立即返回)
|
||||
existing = await decision_manager._find_existing_token(incident.incident_id)
|
||||
existing = existing_tokens.get(incident.incident_id)
|
||||
if existing:
|
||||
decision_info = DecisionInfo(
|
||||
token=existing.token,
|
||||
@@ -207,17 +224,20 @@ async def list_incidents() -> IncidentListResponse:
|
||||
)
|
||||
responses.append(IncidentResponse.from_incident(incident, decision_info))
|
||||
else:
|
||||
# 無快取 → 背景觸發,本次返回 None(前端看到 decision=null 會 poll)
|
||||
# 無快取 → 本次返回 None。列表查詢預設不觸發 AI;
|
||||
# 前端若需要修復建議,必須呼叫明確的 proposal 入口。
|
||||
responses.append(IncidentResponse.from_incident(incident, None))
|
||||
if not generate_missing_decisions:
|
||||
continue
|
||||
|
||||
# 2026-04-16 Claude Sonnet 4.6: 只對 48h 內的 incident 觸發 AI 分析
|
||||
# 舊 incident token 每小時過期,若不限制會反覆重新分析歷史事件 → Telegram 洪水
|
||||
from datetime import datetime, timezone, timedelta
|
||||
_created = getattr(incident, "created_at", None)
|
||||
_too_old = False
|
||||
if _created:
|
||||
if _created.tzinfo is None:
|
||||
_created = _created.replace(tzinfo=timezone.utc)
|
||||
_too_old = (_created < datetime.now(timezone.utc) - timedelta(hours=48))
|
||||
_created = _created.replace(tzinfo=UTC)
|
||||
_too_old = (_created < datetime.now(UTC) - timedelta(hours=48))
|
||||
if not _too_old:
|
||||
timeout = 120.0 if incident.severity in (Severity.P0, Severity.P1) else 180.0
|
||||
background_tasks.append(
|
||||
@@ -240,6 +260,7 @@ async def list_incidents() -> IncidentListResponse:
|
||||
"incidents_listed",
|
||||
count=len(incidents),
|
||||
with_decisions=sum(1 for r in responses if r.decision is not None),
|
||||
generate_missing_decisions=generate_missing_decisions,
|
||||
)
|
||||
|
||||
return IncidentListResponse(
|
||||
|
||||
@@ -9,14 +9,21 @@ ADR-106/ADR-107/ADR-114/ADR-115/ADR-116
|
||||
from fastapi import APIRouter
|
||||
|
||||
from src.api.v1.platform.contracts import router as contracts_router
|
||||
from src.api.v1.platform.events import router as events_router
|
||||
from src.api.v1.platform.operator_runs import router as operator_runs_router
|
||||
from src.api.v1.platform.runs import router as runs_router
|
||||
from src.api.v1.platform.tenants import router as tenants_router
|
||||
from src.api.v1.platform.truth_chain import router as truth_chain_router
|
||||
|
||||
router = APIRouter()
|
||||
router.include_router(events_router)
|
||||
router.include_router(truth_chain_router)
|
||||
# 2026-05-06 Codex: FastAPI 依註冊順序比對路由。Operator Console 的
|
||||
# `/runs/list` 必須排在 `/runs/{run_id}` 前面,否則 `list` 會被當成
|
||||
# run_id,造成前端 Run 監控頁 HTTP 422。
|
||||
router.include_router(operator_runs_router)
|
||||
router.include_router(runs_router)
|
||||
router.include_router(tenants_router)
|
||||
router.include_router(contracts_router)
|
||||
router.include_router(operator_runs_router)
|
||||
|
||||
__all__ = ["router"]
|
||||
|
||||
586
apps/api/src/api/v1/platform/events.py
Normal file
586
apps/api/src/api/v1/platform/events.py
Normal file
@@ -0,0 +1,586 @@
|
||||
"""
|
||||
AwoooP Operator Console — Channel Events API
|
||||
============================================
|
||||
提供 Operator Console 讀取 Communication Hub / legacy mirror 的事件摘要。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from typing import Annotated, Any, Literal
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.awooop_operator_auth import (
|
||||
AwoooPOperatorPrincipal,
|
||||
verify_awooop_operator,
|
||||
)
|
||||
from src.services.channel_event_dossier_service import (
|
||||
RecurrenceWorkItemHandoffKind,
|
||||
RecurrenceWorkItemMode,
|
||||
RecurrenceWorkItemNotFoundError,
|
||||
SourceCorrelationReviewDecision,
|
||||
fetch_channel_event_dossier,
|
||||
fetch_channel_event_dossier_coverage,
|
||||
fetch_channel_event_dossier_recurrence,
|
||||
fetch_recurrence_work_item_dry_run,
|
||||
fetch_recurrence_work_item_handoff,
|
||||
fetch_recurrence_work_item_preview,
|
||||
fetch_source_correlation_apply,
|
||||
fetch_source_correlation_review_decision,
|
||||
)
|
||||
from src.services.channel_hub import record_external_alert_event
|
||||
from src.services.platform_operator_service import list_recent_channel_events
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ChannelEventItem(BaseModel):
|
||||
event_id: UUID
|
||||
project_id: str
|
||||
channel_type: str
|
||||
provider_event_id: str
|
||||
channel_chat_id: str | None
|
||||
content_preview: str | None
|
||||
is_duplicate: bool
|
||||
received_at: datetime
|
||||
|
||||
|
||||
class RecentEventsResponse(BaseModel):
|
||||
events: list[ChannelEventItem]
|
||||
total: int
|
||||
limit: int
|
||||
|
||||
|
||||
class ChannelEventDossierItem(BaseModel):
|
||||
event_id: UUID
|
||||
project_id: str
|
||||
channel_type: str
|
||||
provider: str | None
|
||||
stage: str
|
||||
provider_event_id: str
|
||||
content_preview: str | None
|
||||
content_redacted: str | None
|
||||
has_redacted_content: bool
|
||||
redaction_version: str | None
|
||||
source_url: str | None
|
||||
content_sha256: str | None
|
||||
content_length: int | None
|
||||
source_refs: dict[str, Any]
|
||||
source_ref_count: int
|
||||
log_correlation: dict[str, Any]
|
||||
alertname: str | None
|
||||
severity: str | None
|
||||
namespace: str | None
|
||||
target_resource: str | None
|
||||
fingerprint: str | None
|
||||
is_duplicate: bool
|
||||
provider_ts: datetime | None
|
||||
received_at: datetime
|
||||
|
||||
|
||||
class ChannelEventDossierSummary(BaseModel):
|
||||
source_count: int
|
||||
duplicate_total: int
|
||||
redacted_total: int
|
||||
source_ref_total: int
|
||||
|
||||
|
||||
class ChannelEventDossierResponse(BaseModel):
|
||||
events: list[ChannelEventDossierItem]
|
||||
total: int
|
||||
limit: int
|
||||
summary: ChannelEventDossierSummary
|
||||
|
||||
|
||||
class ChannelEventProviderCoverage(BaseModel):
|
||||
provider: str
|
||||
total: int
|
||||
duplicate_total: int
|
||||
redacted_total: int
|
||||
source_ref_total: int
|
||||
missing_source_refs_total: int
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventDossierCoverageSummary(BaseModel):
|
||||
source_count: int
|
||||
source_envelope_total: int
|
||||
missing_source_envelope_total: int
|
||||
with_source_refs_total: int
|
||||
missing_source_refs_total: int
|
||||
duplicate_total: int
|
||||
redacted_total: int
|
||||
source_ref_total: int
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventDossierCoverageResponse(BaseModel):
|
||||
project_id: str
|
||||
limit: int
|
||||
summary: ChannelEventDossierCoverageSummary
|
||||
providers: list[ChannelEventProviderCoverage]
|
||||
|
||||
|
||||
SourceProviderName = Literal["sentry", "signoz"]
|
||||
|
||||
|
||||
class SourceProviderHeartbeatRequest(BaseModel):
|
||||
"""Low-noise freshness heartbeat for external source-provider mirrors."""
|
||||
|
||||
project_id: str = Field(default="awoooi", min_length=1, max_length=64)
|
||||
providers: list[SourceProviderName] = Field(
|
||||
default_factory=lambda: ["sentry", "signoz"],
|
||||
min_length=1,
|
||||
max_length=2,
|
||||
)
|
||||
reason: str = Field(
|
||||
default="scheduled_provider_freshness_smoke",
|
||||
min_length=1,
|
||||
max_length=120,
|
||||
)
|
||||
run_ref: str | None = Field(default=None, max_length=120)
|
||||
|
||||
|
||||
class SourceProviderHeartbeatItem(BaseModel):
|
||||
provider: SourceProviderName
|
||||
event_id: str
|
||||
conversation_event_id: UUID
|
||||
|
||||
|
||||
class SourceProviderHeartbeatResponse(BaseModel):
|
||||
status: str
|
||||
project_id: str
|
||||
items: list[SourceProviderHeartbeatItem]
|
||||
|
||||
|
||||
class ChannelEventRecurrenceSummary(BaseModel):
|
||||
source_event_total: int
|
||||
recurrence_group_total: int
|
||||
recurrent_group_total: int
|
||||
duplicate_event_total: int
|
||||
linked_run_total: int
|
||||
unlinked_event_total: int
|
||||
auto_repair_linked_total: int = 0
|
||||
verified_repair_group_total: int = 0
|
||||
open_work_item_group_total: int = 0
|
||||
manual_gate_group_total: int = 0
|
||||
automation_gap_group_total: int = 0
|
||||
failed_repair_group_total: int = 0
|
||||
source_correlation_review_group_total: int = 0
|
||||
source_correlation_decision_recorded_group_total: int = 0
|
||||
source_correlation_applied_group_total: int = 0
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventRecurrenceItem(BaseModel):
|
||||
recurrence_key: str
|
||||
provider: str | None
|
||||
alertname: str | None
|
||||
severity: str | None
|
||||
namespace: str | None
|
||||
target_resource: str | None
|
||||
fingerprint: str | None
|
||||
latest_stage: str | None = None
|
||||
latest_event_id: UUID | None
|
||||
latest_provider_event_id: str | None
|
||||
latest_content_preview: str | None
|
||||
latest_run_id: UUID | None
|
||||
latest_run_state: str | None
|
||||
latest_agent_id: str | None
|
||||
latest_incident_id: str | None = None
|
||||
incident_ids: list[str] = Field(default_factory=list)
|
||||
repair_summary: dict[str, Any] | None = None
|
||||
work_item: dict[str, Any] | None = None
|
||||
source_correlation_review: dict[str, Any] | None = None
|
||||
source_correlation_apply: dict[str, Any] | None = None
|
||||
occurrence_total: int
|
||||
duplicate_total: int
|
||||
linked_run_total: int
|
||||
source_ref_total: int
|
||||
missing_source_refs_total: int
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
stage_counts: dict[str, int] = Field(default_factory=dict)
|
||||
run_state_counts: dict[str, int]
|
||||
first_received_at: datetime | None
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventRecurrenceResponse(BaseModel):
|
||||
project_id: str
|
||||
limit: int
|
||||
summary: ChannelEventRecurrenceSummary
|
||||
items: list[ChannelEventRecurrenceItem]
|
||||
|
||||
|
||||
class RecurrenceWorkItemDryRunRequest(BaseModel):
|
||||
"""AwoooP recurrence work item dry-run request."""
|
||||
|
||||
project_id: str | None = Field(default=None, min_length=1)
|
||||
work_item_id: str = Field(min_length=1)
|
||||
mode: RecurrenceWorkItemMode = "auto"
|
||||
provider: str | None = Field(default=None, min_length=1)
|
||||
limit: int = Field(default=300, ge=1, le=300)
|
||||
|
||||
|
||||
class RecurrenceWorkItemHandoffRequest(BaseModel):
|
||||
"""AwoooP recurrence work item handoff request."""
|
||||
|
||||
project_id: str | None = Field(default=None, min_length=1)
|
||||
work_item_id: str = Field(min_length=1)
|
||||
mode: RecurrenceWorkItemMode = "auto"
|
||||
handoff_kind: RecurrenceWorkItemHandoffKind = "ticket_proposal"
|
||||
provider: str | None = Field(default=None, min_length=1)
|
||||
limit: int = Field(default=300, ge=1, le=300)
|
||||
|
||||
|
||||
class SourceCorrelationReviewDecisionRequest(BaseModel):
|
||||
"""Record-only source evidence review decision."""
|
||||
|
||||
project_id: str | None = Field(default=None, min_length=1)
|
||||
work_item_id: str = Field(min_length=1)
|
||||
decision: SourceCorrelationReviewDecision
|
||||
target_incident_id: str | None = Field(default=None, min_length=1, max_length=30)
|
||||
reviewer_id: str = Field(default="operator_console", min_length=1, max_length=100)
|
||||
operator_note: str | None = Field(default=None, max_length=500)
|
||||
provider: str | None = Field(default=None, min_length=1)
|
||||
limit: int = Field(default=300, ge=1, le=300)
|
||||
|
||||
|
||||
class SourceCorrelationApplyRequest(BaseModel):
|
||||
"""Append-only source evidence link apply request."""
|
||||
|
||||
project_id: str | None = Field(default=None, min_length=1)
|
||||
work_item_id: str = Field(min_length=1)
|
||||
reviewer_id: str = Field(default="operator_console", min_length=1, max_length=100)
|
||||
operator_note: str | None = Field(default=None, max_length=500)
|
||||
provider: str | None = Field(default=None, min_length=1)
|
||||
limit: int = Field(default=300, ge=1, le=300)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier",
|
||||
response_model=ChannelEventDossierResponse,
|
||||
summary="查詢 Channel Event 來源卷宗",
|
||||
description=(
|
||||
"返回 redacted inbound source envelope,供 AwoooP Run Detail 顯示"
|
||||
"告警來源、source refs、Sentry / SignOz / Alertmanager 關聯與去重狀態。"
|
||||
),
|
||||
)
|
||||
async def get_event_dossier(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
run_id: UUID | None = Query(None, description="Run ID(可選)"),
|
||||
provider_event_id: str | None = Query(
|
||||
None, description="provider_event_id(可選)"
|
||||
),
|
||||
limit: int = Query(20, ge=1, le=50, description="最多返回筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await fetch_channel_event_dossier(
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
provider_event_id=provider_event_id,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier/coverage",
|
||||
response_model=ChannelEventDossierCoverageResponse,
|
||||
summary="查詢 Channel Event 來源卷宗覆蓋率",
|
||||
description=(
|
||||
"返回近期 inbound event 的 source_envelope / source_refs / 去重 / "
|
||||
"Sentry / SignOz 關聯覆蓋率,供 AwoooP Run List 顯示告警是否已入庫。"
|
||||
),
|
||||
)
|
||||
async def get_event_dossier_coverage(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
provider: str | None = Query(
|
||||
None, description="provider(可選,如 sentry / signoz)"
|
||||
),
|
||||
limit: int = Query(100, ge=1, le=200, description="最多納入統計筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await fetch_channel_event_dossier_coverage(
|
||||
project_id=project_id,
|
||||
provider=provider,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/events/dossier/provider-heartbeat",
|
||||
response_model=SourceProviderHeartbeatResponse,
|
||||
summary="寫入 Sentry / SignOz 來源卷宗 freshness heartbeat",
|
||||
description=(
|
||||
"受 AwoooP operator key 保護的低噪音 smoke。只寫入來源卷宗與"
|
||||
"completed shadow run,不建立 Incident、不送 Telegram、不宣稱真實上游告警。"
|
||||
),
|
||||
)
|
||||
async def create_source_provider_heartbeat(
|
||||
payload: SourceProviderHeartbeatRequest,
|
||||
operator: Annotated[
|
||||
AwoooPOperatorPrincipal,
|
||||
Depends(verify_awooop_operator),
|
||||
],
|
||||
) -> dict[str, Any]:
|
||||
timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
|
||||
items: list[dict[str, Any]] = []
|
||||
|
||||
for provider in payload.providers:
|
||||
event_id = f"heartbeat-{timestamp}"
|
||||
event_uuid = await record_external_alert_event(
|
||||
project_id=payload.project_id,
|
||||
provider=provider,
|
||||
event_id=event_id,
|
||||
stage="heartbeat",
|
||||
title="SourceProviderHeartbeat",
|
||||
severity="info",
|
||||
namespace="awoooi-prod",
|
||||
target_resource="source-provider-ingestion",
|
||||
fingerprint=f"source-provider-heartbeat:{provider}",
|
||||
labels={
|
||||
"provider": provider,
|
||||
"synthetic": "true",
|
||||
"alert_category": "alertchain_provider_freshness",
|
||||
"telegram": "not_sent",
|
||||
"incident": "not_created",
|
||||
},
|
||||
annotations={
|
||||
"summary": (
|
||||
"Low-noise provider freshness smoke; verifies AwoooP "
|
||||
"source dossier ingestion without creating an incident."
|
||||
),
|
||||
"reason": payload.reason,
|
||||
},
|
||||
payload={
|
||||
"reason": payload.reason,
|
||||
"run_ref": payload.run_ref,
|
||||
"operator_id": operator.operator_id,
|
||||
"auth_method": operator.auth_method,
|
||||
"synthetic": True,
|
||||
"side_effects": {
|
||||
"incident_created": False,
|
||||
"telegram_sent": False,
|
||||
"approval_created": False,
|
||||
},
|
||||
},
|
||||
)
|
||||
if event_uuid is None:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"{provider} provider heartbeat was not recorded",
|
||||
)
|
||||
items.append(
|
||||
{
|
||||
"provider": provider,
|
||||
"event_id": event_id,
|
||||
"conversation_event_id": event_uuid,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "recorded",
|
||||
"project_id": payload.project_id,
|
||||
"items": items,
|
||||
}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier/recurrence",
|
||||
response_model=ChannelEventRecurrenceResponse,
|
||||
summary="查詢 Channel Event 重複發生與關聯 Run 狀態",
|
||||
description=(
|
||||
"將近期 inbound source events 依 fingerprint / alertname / namespace / target 分組,"
|
||||
"顯示重複發生次數、去重數、source refs 與最新 linked run 狀態。"
|
||||
),
|
||||
)
|
||||
async def get_event_dossier_recurrence(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
provider: str | None = Query(
|
||||
None, description="provider(可選,如 alertmanager / sentry / signoz)"
|
||||
),
|
||||
limit: int = Query(100, ge=1, le=300, description="最多納入統計筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await fetch_channel_event_dossier_recurrence(
|
||||
project_id=project_id,
|
||||
provider=provider,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier/recurrence/work-item/preview",
|
||||
summary="預覽重複告警工作項的安全處理計畫",
|
||||
description=(
|
||||
"依 recurrence read model 找出指定 work_item,返回下一步、pre-flight checks "
|
||||
"與 read-only / no-write 保證;不修改 incident、auto-repair 或 ticket 狀態。"
|
||||
),
|
||||
)
|
||||
async def preview_event_recurrence_work_item(
|
||||
work_item_id: str = Query(..., min_length=1, description="recurrence work_item_id"),
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
provider: str | None = Query(
|
||||
None, description="provider(可選,如 alertmanager / sentry / signoz)"
|
||||
),
|
||||
mode: RecurrenceWorkItemMode = Query("auto", description="預覽模式"),
|
||||
limit: int = Query(300, ge=1, le=300, description="最多納入統計筆數"),
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await fetch_recurrence_work_item_preview(
|
||||
project_id=project_id,
|
||||
work_item_id=work_item_id,
|
||||
mode=mode,
|
||||
provider=provider,
|
||||
limit=limit,
|
||||
)
|
||||
except RecurrenceWorkItemNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="recurrence_work_item_not_found",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/events/dossier/recurrence/work-item/dry-run",
|
||||
summary="乾跑重複告警工作項的安全處理流程",
|
||||
description=(
|
||||
"依 recurrence read model 產生 dry-run 結果並寫入 pre-flight history,"
|
||||
"但不修改 incident、auto-repair 或 ticket 狀態。"
|
||||
),
|
||||
)
|
||||
async def dry_run_event_recurrence_work_item(
|
||||
request: RecurrenceWorkItemDryRunRequest,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await fetch_recurrence_work_item_dry_run(
|
||||
project_id=request.project_id,
|
||||
work_item_id=request.work_item_id,
|
||||
mode=request.mode,
|
||||
provider=request.provider,
|
||||
limit=request.limit,
|
||||
)
|
||||
except RecurrenceWorkItemNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="recurrence_work_item_not_found",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/events/dossier/recurrence/work-item/handoff",
|
||||
summary="記錄重複告警工作項的交接提案",
|
||||
description=(
|
||||
"依 recurrence read model 與 dry-run 結果記錄 ticket proposal / 人工接手歷史,"
|
||||
"但不修改 incident、auto-repair 或外部 ticket 狀態。"
|
||||
),
|
||||
)
|
||||
async def handoff_event_recurrence_work_item(
|
||||
request: RecurrenceWorkItemHandoffRequest,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await fetch_recurrence_work_item_handoff(
|
||||
project_id=request.project_id,
|
||||
work_item_id=request.work_item_id,
|
||||
mode=request.mode,
|
||||
handoff_kind=request.handoff_kind,
|
||||
provider=request.provider,
|
||||
limit=request.limit,
|
||||
)
|
||||
except RecurrenceWorkItemNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="recurrence_work_item_not_found",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/events/dossier/recurrence/source-correlation/review",
|
||||
summary="記錄來源證據與 Incident 配對審核結果",
|
||||
description=(
|
||||
"針對 source_correlation_review work item 記錄 operator 審核決定。"
|
||||
"本 API 僅寫入 alert_operation_log / 可選 timeline_events,"
|
||||
"不修改 Incident 狀態、不回寫 source event、不建立外部 ticket。"
|
||||
),
|
||||
)
|
||||
async def review_source_correlation_work_item(
|
||||
request: SourceCorrelationReviewDecisionRequest,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await fetch_source_correlation_review_decision(
|
||||
project_id=request.project_id,
|
||||
work_item_id=request.work_item_id,
|
||||
decision=request.decision,
|
||||
target_incident_id=request.target_incident_id,
|
||||
reviewer_id=request.reviewer_id,
|
||||
operator_note=request.operator_note,
|
||||
provider=request.provider,
|
||||
limit=request.limit,
|
||||
)
|
||||
except RecurrenceWorkItemNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="recurrence_work_item_not_found",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/events/dossier/recurrence/source-correlation/apply",
|
||||
summary="套用已確認的來源證據與 Incident 配對",
|
||||
description=(
|
||||
"只接受已寫入 accepted review 的 source_correlation_review work item。"
|
||||
"成功時以 append-only 方式新增 source_correlation_linked 來源事件,"
|
||||
"並寫入 alert_operation_log / timeline_events。"
|
||||
"不修改 Incident 狀態、不修改 auto-repair 結果、不建立外部 ticket。"
|
||||
),
|
||||
)
|
||||
async def apply_source_correlation_work_item(
|
||||
request: SourceCorrelationApplyRequest,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await fetch_source_correlation_apply(
|
||||
project_id=request.project_id,
|
||||
work_item_id=request.work_item_id,
|
||||
reviewer_id=request.reviewer_id,
|
||||
operator_note=request.operator_note,
|
||||
provider=request.provider,
|
||||
limit=request.limit,
|
||||
)
|
||||
except RecurrenceWorkItemNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="recurrence_work_item_not_found",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/recent",
|
||||
response_model=RecentEventsResponse,
|
||||
summary="列出最近 Channel Events",
|
||||
description=(
|
||||
"返回 awooop_conversation_event 最近事件。"
|
||||
"可用 channel_type / provider_prefix 過濾,例如 alert-group 收斂事件。"
|
||||
),
|
||||
)
|
||||
async def list_recent_events(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
channel_type: str | None = Query(None, description="通道類型(可選)"),
|
||||
provider_prefix: str | None = Query(
|
||||
None, description="provider_event_id 前綴(可選)"
|
||||
),
|
||||
limit: int = Query(20, ge=1, le=100, description="最多返回筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_recent_channel_events(
|
||||
project_id=project_id,
|
||||
channel_type=channel_type,
|
||||
provider_prefix=provider_prefix,
|
||||
limit=limit,
|
||||
)
|
||||
@@ -15,12 +15,35 @@ from decimal import Decimal
|
||||
from typing import Any, Literal
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.awooop_operator_auth import (
|
||||
AwoooPOperatorPrincipal,
|
||||
verify_awooop_operator,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
decide_approval as decide_approval_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
get_ai_route_status as get_ai_route_status_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
get_awooop_status_chain as get_awooop_status_chain_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
get_run_detail as get_run_detail_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
list_cicd_events as list_cicd_events_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
list_approvals as list_approvals_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
list_callback_replies as list_callback_replies_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
list_runs as list_runs_svc,
|
||||
)
|
||||
|
||||
@@ -40,6 +63,8 @@ class RunItem(BaseModel):
|
||||
step_count: int
|
||||
created_at: datetime
|
||||
timeout_at: datetime | None
|
||||
remediation_summary: dict[str, Any] | None = None
|
||||
callback_reply_summary: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class ListRunsResponse(BaseModel):
|
||||
@@ -49,12 +74,151 @@ class ListRunsResponse(BaseModel):
|
||||
per_page: int
|
||||
|
||||
|
||||
class CallbackReplyItem(BaseModel):
|
||||
message_id: UUID
|
||||
run_id: UUID
|
||||
project_id: str
|
||||
status: str
|
||||
needs_human: bool
|
||||
action: str | None = None
|
||||
incident_id: str | None = None
|
||||
event_at: datetime | None = None
|
||||
channel_type: str
|
||||
message_type: str
|
||||
send_status: str
|
||||
send_error: str | None = None
|
||||
provider_message_id: str | None = None
|
||||
triggered_by_state: str | None = None
|
||||
content_preview: str | None = None
|
||||
run_state: str | None = None
|
||||
agent_id: str | None = None
|
||||
run_created_at: datetime | None = None
|
||||
callback_reply: dict[str, Any]
|
||||
awooop_status_chain: dict[str, Any] | None = None
|
||||
persisted_awooop_status_chain: dict[str, Any] | None = None
|
||||
km_stale_completion_summary: dict[str, Any] | None = None
|
||||
persisted_km_stale_completion_summary: dict[str, Any] | None = None
|
||||
evidence_capture_status: dict[str, Any] | None = None
|
||||
run_detail_href: str | None = None
|
||||
|
||||
|
||||
class OutboundReplyMarkupGapPrefix(BaseModel):
|
||||
prefix: str
|
||||
total: int
|
||||
recent_24h_total: int = 0
|
||||
first_sent_at: datetime | None = None
|
||||
last_sent_at: datetime | None = None
|
||||
|
||||
|
||||
class CallbackReplyAuditSummary(BaseModel):
|
||||
schema_version: str
|
||||
project_id: str
|
||||
outbound_total: int
|
||||
outbound_source_envelope_total: int
|
||||
outbound_source_refs_total: int
|
||||
outbound_trace_ref_total: int = 0
|
||||
outbound_incident_ref_total: int
|
||||
outbound_reply_markup_total: int = 0
|
||||
outbound_reply_markup_missing_incident_ref_total: int = 0
|
||||
outbound_reply_markup_missing_incident_ref_recent_1h_total: int = 0
|
||||
outbound_reply_markup_missing_incident_ref_recent_24h_total: int = 0
|
||||
outbound_reply_markup_missing_incident_ref_latest_sent_at: datetime | None = None
|
||||
outbound_reply_markup_missing_trace_ref_total: int = 0
|
||||
outbound_reply_markup_missing_trace_ref_recent_1h_total: int = 0
|
||||
outbound_reply_markup_missing_trace_ref_recent_24h_total: int = 0
|
||||
outbound_reply_markup_missing_trace_ref_latest_sent_at: datetime | None = None
|
||||
outbound_reply_markup_trace_ref_gap_status: str = "clean"
|
||||
outbound_reply_markup_trace_ref_gap_next_action: str = "none"
|
||||
outbound_reply_markup_trace_ref_after_gap_total: int = 0
|
||||
outbound_reply_markup_trace_ref_after_gap_first_sent_at: datetime | None = None
|
||||
outbound_reply_markup_trace_ref_after_gap_latest_sent_at: datetime | None = None
|
||||
outbound_reply_markup_trace_ref_gap_recovery_status: str = "not_needed"
|
||||
outbound_reply_markup_missing_incident_ref_top_prefixes: list[
|
||||
OutboundReplyMarkupGapPrefix
|
||||
] = Field(default_factory=list)
|
||||
outbound_reply_markup_missing_trace_ref_top_prefixes: list[
|
||||
OutboundReplyMarkupGapPrefix
|
||||
] = Field(default_factory=list)
|
||||
outbound_failed_total: int
|
||||
callback_total: int
|
||||
callback_sent_total: int
|
||||
callback_fallback_total: int
|
||||
callback_rescue_total: int
|
||||
callback_failed_total: int
|
||||
callback_detail_total: int
|
||||
callback_history_total: int
|
||||
callback_snapshot_captured_total: int
|
||||
callback_snapshot_partial_total: int
|
||||
callback_snapshot_missing_total: int
|
||||
callback_incident_total: int
|
||||
snapshot_status: str
|
||||
next_action: str
|
||||
latest_outbound_at: datetime | None = None
|
||||
latest_callback_at: datetime | None = None
|
||||
|
||||
|
||||
class ListCallbackRepliesResponse(BaseModel):
|
||||
items: list[CallbackReplyItem]
|
||||
total: int
|
||||
page: int
|
||||
per_page: int
|
||||
summary: CallbackReplyAuditSummary | None = None
|
||||
|
||||
|
||||
class CicdEventItem(BaseModel):
|
||||
id: str
|
||||
project_id: str
|
||||
alertname: str
|
||||
stage: str | None = None
|
||||
status: str | None = None
|
||||
severity: str | None = None
|
||||
commit_sha: str | None = None
|
||||
triggered_by: str | None = None
|
||||
duration_seconds: int = 0
|
||||
summary: str | None = None
|
||||
description: str | None = None
|
||||
workflow_url: str | None = None
|
||||
alert_id: str | None = None
|
||||
source: str | None = None
|
||||
action_detail: str | None = None
|
||||
needs_attention: bool = False
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class ListCicdEventsResponse(BaseModel):
|
||||
items: list[CicdEventItem]
|
||||
total: int
|
||||
limit: int
|
||||
|
||||
|
||||
class AiRouteStatusResponse(BaseModel):
|
||||
schema_version: str
|
||||
workload_type: str
|
||||
policy_order: list[dict[str, Any]]
|
||||
selected_provider: str | None = None
|
||||
selected_url: str | None = None
|
||||
selected_model: str | None = None
|
||||
fallback_chain: list[dict[str, Any]]
|
||||
route_reason: str
|
||||
route_source: str
|
||||
route_error: str | None = None
|
||||
health: dict[str, dict[str, Any]]
|
||||
lane_mode: str | None = None
|
||||
active_lane: dict[str, Any] | None = None
|
||||
skipped_lanes: list[dict[str, Any]] = Field(default_factory=list)
|
||||
operator_action: dict[str, Any] | None = None
|
||||
repair_evidence: dict[str, Any] | None = None
|
||||
checked_at: datetime
|
||||
|
||||
|
||||
class ApprovalItem(BaseModel):
|
||||
run_id: UUID
|
||||
project_id: str
|
||||
agent_id: str
|
||||
created_at: datetime
|
||||
timeout_at: datetime | None
|
||||
remediation_summary: dict[str, Any] | None = None
|
||||
awooop_status_chain: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class ListApprovalsResponse(BaseModel):
|
||||
@@ -65,7 +229,10 @@ class ListApprovalsResponse(BaseModel):
|
||||
class DecideApprovalRequest(BaseModel):
|
||||
project_id: str = Field(..., description="租戶 ID")
|
||||
decision: Literal["approve", "reject"] = Field(..., description="核准或拒絕")
|
||||
approver_id: str = Field(..., description="審核人 ID(platform_subject_id 或 operator email)")
|
||||
approver_id: str | None = Field(
|
||||
default=None,
|
||||
description="Deprecated. Ignored; approver comes from trusted operator headers.",
|
||||
)
|
||||
reason: str | None = Field(None, description="決策原因(可選)")
|
||||
|
||||
|
||||
@@ -81,7 +248,8 @@ class DecideApprovalResponse(BaseModel):
|
||||
response_model=ListRunsResponse,
|
||||
summary="列出 Runs",
|
||||
description=(
|
||||
"返回 awooop_run_state 記錄,支援 project_id / state filter 與分頁。\n\n"
|
||||
"返回 awooop_run_state 記錄,支援 project_id / state / remediation_status / "
|
||||
"callback_reply_status / incident_id filter 與分頁。\n\n"
|
||||
"- 按 created_at DESC 排序\n"
|
||||
"- 注意:此路徑為 /runs/list 以避免與 runs.py 的 /runs/{run_id} 衝突"
|
||||
),
|
||||
@@ -89,11 +257,133 @@ class DecideApprovalResponse(BaseModel):
|
||||
async def list_runs(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
state: str | None = Query(None, description="Run 狀態 filter(可選)"),
|
||||
remediation_status: str | None = Query(
|
||||
None,
|
||||
description="AI 證據狀態 filter(no_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed)",
|
||||
),
|
||||
callback_reply_status: str | None = Query(
|
||||
None,
|
||||
description="Telegram callback reply 狀態 filter(no_callback/sent/fallback_sent/rescue_sent/failed/observed)",
|
||||
),
|
||||
incident_id: str | None = Query(None, description="關聯 Incident ID filter(可選)"),
|
||||
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
|
||||
per_page: int = Query(_DEFAULT_PER_PAGE, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_runs_svc(
|
||||
project_id=project_id, state=state, page=page, per_page=per_page
|
||||
project_id=project_id,
|
||||
state=state,
|
||||
remediation_status=remediation_status,
|
||||
callback_reply_status=callback_reply_status,
|
||||
incident_id=incident_id,
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/runs/callback-replies",
|
||||
response_model=ListCallbackRepliesResponse,
|
||||
summary="列出 Telegram Callback Reply Evidence",
|
||||
description=(
|
||||
"從 AwoooP outbound mirror 查詢 Telegram 詳情 / 歷史 callback reply 的"
|
||||
"送達、fallback、救援與失敗證據;只讀,不修改 incident、run 或 Telegram 狀態。"
|
||||
),
|
||||
)
|
||||
async def list_callback_replies(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
callback_reply_status: str | None = Query(
|
||||
None,
|
||||
description="Telegram callback reply 狀態 filter(sent/fallback_sent/rescue_sent/failed/observed/no_callback)",
|
||||
),
|
||||
action: str | None = Query(None, description="Callback action filter(例如 detail/history)"),
|
||||
incident_id: str | None = Query(None, description="關聯 Incident ID filter(可選)"),
|
||||
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
|
||||
per_page: int = Query(20, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_callback_replies_svc(
|
||||
project_id=project_id,
|
||||
callback_reply_status=callback_reply_status,
|
||||
action=action,
|
||||
incident_id=incident_id,
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/cicd/events",
|
||||
response_model=ListCicdEventsResponse,
|
||||
summary="列出 CI/CD evidence events",
|
||||
description=(
|
||||
"從 alert_operation_log 讀取 CI/CD notification evidence,供 AwoooP "
|
||||
"Deployments / Run Console 顯示 rollout-risk、success、failed 等階段狀態。"
|
||||
),
|
||||
)
|
||||
async def list_cicd_events(
|
||||
project_id: str | None = Query(None, description="租戶 ID(目前支援 awoooi)"),
|
||||
stage: str | None = Query(None, description="CI/CD stage filter(可選)"),
|
||||
status: str | None = Query(None, description="CI/CD status filter(running/success/failed/pending)"),
|
||||
limit: int = Query(12, ge=1, le=50, description="最多返回筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_cicd_events_svc(
|
||||
project_id=project_id,
|
||||
stage=stage,
|
||||
status_filter=status,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/ai-route-status",
|
||||
response_model=AiRouteStatusResponse,
|
||||
summary="查詢 AI Provider 路由狀態",
|
||||
description=(
|
||||
"回傳目前 Ollama/Gemini 路由策略、即時 primary、fallback chain 與健康狀態;"
|
||||
"只讀,不觸發推理或自動修復。"
|
||||
),
|
||||
)
|
||||
async def get_ai_route_status(
|
||||
workload_type: str | None = Query(
|
||||
"deep_rca",
|
||||
description="工作負載類型,例如 deep_rca/hermes/interactive/embedding/rag/code_review/image_analysis",
|
||||
),
|
||||
) -> dict[str, Any]:
|
||||
return await get_ai_route_status_svc(workload_type=workload_type)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/runs/{run_id}/detail",
|
||||
summary="查詢 Run 詳細時間線",
|
||||
description=(
|
||||
"返回單一 Run 的主狀態、Step Journal、MCP Gateway audit、"
|
||||
"入站 Channel Event 與出站訊息,供 Operator Console 顯示完整處置脈絡。"
|
||||
),
|
||||
)
|
||||
async def get_run_detail(
|
||||
run_id: str,
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
) -> dict[str, Any]:
|
||||
return await get_run_detail_svc(run_id=run_id, project_id=project_id)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/status-chain",
|
||||
summary="查詢 AwoooP 狀態鏈",
|
||||
description=(
|
||||
"依 incident_id 查詢 truth-chain + ADR-100 history 合併後的只讀狀態鏈,"
|
||||
"供 Work Items、Approvals、Monitoring 等操作頁面共用。"
|
||||
),
|
||||
)
|
||||
async def get_awooop_status_chain(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
incident_id: list[str] | None = Query(
|
||||
None,
|
||||
description="Incident ID,可重複傳入以合併同一工作項的多個事件",
|
||||
),
|
||||
) -> dict[str, Any]:
|
||||
return await get_awooop_status_chain_svc(
|
||||
project_id=project_id,
|
||||
incident_ids=incident_id or [],
|
||||
)
|
||||
|
||||
|
||||
@@ -108,8 +398,17 @@ async def list_runs(
|
||||
)
|
||||
async def list_approvals(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
run_id: str | None = Query(None, description="Run ID(可選,M8 詳情頁查單筆)"),
|
||||
remediation_status: str | None = Query(
|
||||
None,
|
||||
description="AI 證據狀態 filter(no_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed)",
|
||||
),
|
||||
) -> dict[str, Any]:
|
||||
return await list_approvals_svc(project_id=project_id)
|
||||
return await list_approvals_svc(
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
remediation_status=remediation_status,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
@@ -126,11 +425,12 @@ async def list_approvals(
|
||||
async def decide_approval(
|
||||
run_id: str,
|
||||
body: DecideApprovalRequest,
|
||||
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
|
||||
) -> dict[str, Any]:
|
||||
return await decide_approval_svc(
|
||||
run_id=run_id,
|
||||
project_id=body.project_id,
|
||||
decision=body.decision,
|
||||
approver_id=body.approver_id,
|
||||
approver_id=operator.operator_id,
|
||||
reason=body.reason,
|
||||
)
|
||||
|
||||
64
apps/api/src/api/v1/platform/truth_chain.py
Normal file
64
apps/api/src/api/v1/platform/truth_chain.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""AwoooP Operator Console — truth-chain read API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from src.core.awooop_operator_auth import (
|
||||
AwoooPOperatorPrincipal,
|
||||
verify_awooop_operator,
|
||||
)
|
||||
from src.services.awooop_truth_chain_service import (
|
||||
fetch_automation_quality_summary,
|
||||
fetch_truth_chain,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/truth-chain/quality/summary",
|
||||
summary="查詢 AI 自動化品質總覽",
|
||||
description=(
|
||||
"T12c read-only aggregate endpoint. 聚合最近 incident 的 automation quality gate,"
|
||||
"讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
|
||||
"此總覽不回傳逐筆 examples;source-level truth-chain 詳情仍需 operator auth。"
|
||||
),
|
||||
)
|
||||
async def get_automation_quality_summary(
|
||||
project_id: str = Query("awoooi", description="租戶 ID"),
|
||||
hours: int = Query(24, ge=1, le=168, description="回看小時數"),
|
||||
limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
|
||||
) -> dict[str, Any]:
|
||||
summary = await fetch_automation_quality_summary(
|
||||
project_id=project_id,
|
||||
hours=hours,
|
||||
limit=limit,
|
||||
)
|
||||
summary["examples"] = []
|
||||
summary["visibility_note"] = (
|
||||
"Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details."
|
||||
)
|
||||
return summary
|
||||
|
||||
|
||||
@router.get(
|
||||
"/truth-chain/{source_id}",
|
||||
summary="查詢 Telegram / Incident / Drift 真相鏈",
|
||||
description=(
|
||||
"T0 read-only endpoint. 聚合 incident、approval、evidence、MCP、"
|
||||
"automation_operation_log、drift repeat state 與 outbound mirror,"
|
||||
"讓 Operator Console 能判斷 Telegram 卡片目前卡在哪個流程節點。"
|
||||
),
|
||||
)
|
||||
async def get_truth_chain(
|
||||
source_id: str,
|
||||
project_id: str = Query("awoooi", description="租戶 ID"),
|
||||
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
|
||||
) -> dict[str, Any]:
|
||||
# operator dependency intentionally gates this read API even though the
|
||||
# principal is not otherwise needed by the aggregation query.
|
||||
_ = operator
|
||||
return await fetch_truth_chain(source_id=source_id, project_id=project_id)
|
||||
@@ -8,9 +8,10 @@ leWOOOgo 原則: Router 只做 HTTP 轉發,業務邏輯在 KnowledgeRAGService
|
||||
建立者: Claude Code (Phase 33 ADR-067)
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
||||
from fastapi import APIRouter, BackgroundTasks
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.core.config import get_settings
|
||||
from src.services.knowledge_rag_service import get_knowledge_rag_service
|
||||
|
||||
router = APIRouter(prefix="/rag", tags=["RAG Knowledge Base"])
|
||||
@@ -43,9 +44,10 @@ async def trigger_index(background_tasks: BackgroundTasks) -> RagIndexResponse:
|
||||
- .agents/skills/*.md
|
||||
"""
|
||||
background_tasks.add_task(_run_index)
|
||||
model = get_settings().OLLAMA_EMBEDDING_MODEL
|
||||
return RagIndexResponse(
|
||||
status="accepted",
|
||||
message="索引已排程,背景執行中(nomic-embed-text @ Ollama 111)",
|
||||
message=f"索引已排程,背景執行中({model} @ Ollama GCP-A/GCP-B/111)",
|
||||
)
|
||||
|
||||
|
||||
@@ -62,6 +64,7 @@ async def rag_debug() -> dict:
|
||||
"""診斷用:確認容器內 docs 路徑 + Ollama 連線"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
paths_check = {}
|
||||
@@ -76,15 +79,27 @@ async def rag_debug() -> dict:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as c:
|
||||
from src.core.config import get_settings as _gs
|
||||
r = await c.post(
|
||||
f"{_gs().OLLAMA_URL}/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": "test"},
|
||||
)
|
||||
ollama_ok = r.status_code == 200 if r.status_code == 200 else f"http_{r.status_code}"
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||
|
||||
settings = _gs()
|
||||
statuses: list[str] = []
|
||||
for endpoint in resolve_ollama_order("embedding"):
|
||||
if not endpoint.url:
|
||||
continue
|
||||
r = await c.post(
|
||||
f"{endpoint.url}/api/embeddings",
|
||||
json={"model": settings.OLLAMA_EMBEDDING_MODEL, "prompt": "test"},
|
||||
)
|
||||
if r.status_code == 200:
|
||||
ollama_ok = True
|
||||
break
|
||||
statuses.append(f"{endpoint.provider_name}=http_{r.status_code}")
|
||||
if ollama_ok is not True:
|
||||
ollama_ok = ", ".join(statuses) or "no_endpoint"
|
||||
except Exception as e:
|
||||
ollama_ok = f"error: {type(e).__name__}: {e}"
|
||||
|
||||
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_111_embed": ollama_ok}
|
||||
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_embedding": ollama_ok}
|
||||
|
||||
|
||||
@router.get("/stats", summary="索引統計")
|
||||
|
||||
@@ -14,12 +14,15 @@ AWOOOI API - Sentry Webhook Handler
|
||||
🔴 HARD RULE: 時間顯示使用 Asia/Taipei (UTC+8)
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.core.awooop_operator_auth import authenticate_awooop_operator_headers
|
||||
from src.core.circuit_breaker import get_openclaw_guard
|
||||
from src.core.metrics import (
|
||||
record_alert_chain_failure,
|
||||
@@ -35,8 +38,10 @@ from src.models.approval import (
|
||||
)
|
||||
from src.services.anomaly_counter import get_anomaly_counter
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.channel_hub import record_external_alert_event
|
||||
from src.services.openclaw_http_service import get_openclaw_http_service
|
||||
from src.services.sentry_service import get_sentry_service
|
||||
|
||||
# 2026-04-27 P3.1-T2 by Claude — Tier-2 三服務感知強化:補 SentryWebhookService 簽章驗證
|
||||
from src.services.sentry_webhook_service import (
|
||||
SentrySignatureError,
|
||||
@@ -87,6 +92,114 @@ async def sentry_webhook_health() -> dict:
|
||||
return {"status": "ok", "webhook": "sentry"}
|
||||
|
||||
|
||||
def _sentry_event_tag(event_data: dict[str, Any], key: str) -> str | None:
|
||||
tags = event_data.get("tags") or []
|
||||
for tag in tags:
|
||||
if isinstance(tag, list | tuple) and len(tag) >= 2 and str(tag[0]) == key:
|
||||
return str(tag[1])
|
||||
if isinstance(tag, dict) and str(tag.get("key")) == key:
|
||||
value = tag.get("value")
|
||||
return str(value) if value is not None else None
|
||||
return None
|
||||
|
||||
|
||||
def _is_sentry_upstream_canary(payload: dict[str, Any]) -> bool:
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if not isinstance(data, dict) or payload.get("action") != "triggered":
|
||||
return False
|
||||
issue_data = data.get("issue") if isinstance(data.get("issue"), dict) else {}
|
||||
event_data = data.get("event") if isinstance(data.get("event"), dict) else {}
|
||||
issue_id = str(issue_data.get("id") or "")
|
||||
short_id = str(issue_data.get("shortId") or "")
|
||||
title = str(issue_data.get("title") or "")
|
||||
return (
|
||||
issue_id.startswith("awoooi-canary-")
|
||||
or short_id.upper().startswith("AWOOOI-CANARY")
|
||||
or title == "AwoooPSourceProviderCanary"
|
||||
or (_sentry_event_tag(event_data, "awoooi_canary") or "").lower() == "true"
|
||||
)
|
||||
|
||||
|
||||
async def _record_sentry_upstream_canary(
|
||||
payload: dict[str, Any],
|
||||
request: Request,
|
||||
) -> dict[str, Any]:
|
||||
operator = authenticate_awooop_operator_headers(
|
||||
request.headers.get("x-awooop-operator-id"),
|
||||
request.headers.get("x-awooop-operator-key"),
|
||||
)
|
||||
data = payload.get("data") if isinstance(payload.get("data"), dict) else {}
|
||||
issue_data = data.get("issue") if isinstance(data.get("issue"), dict) else {}
|
||||
event_data = data.get("event") if isinstance(data.get("event"), dict) else {}
|
||||
issue_id = str(
|
||||
issue_data.get("id")
|
||||
or issue_data.get("shortId")
|
||||
or _sentry_event_tag(event_data, "run_ref")
|
||||
or "awoooi-canary-unknown"
|
||||
)
|
||||
source_url = (
|
||||
issue_data.get("permalink")
|
||||
or issue_data.get("web_url")
|
||||
or issue_data.get("url")
|
||||
)
|
||||
event_uuid = await record_external_alert_event(
|
||||
project_id="awoooi",
|
||||
provider="sentry",
|
||||
event_id=issue_id,
|
||||
stage="upstream_canary",
|
||||
title=str(issue_data.get("title") or "AwoooPSourceProviderCanary"),
|
||||
severity=str(issue_data.get("level") or "info"),
|
||||
namespace="awoooi-prod",
|
||||
target_resource=str(issue_data.get("culprit") or "source-provider-ingestion"),
|
||||
fingerprint=f"source-provider-canary:sentry:{issue_id}",
|
||||
source_url=source_url,
|
||||
labels={
|
||||
"project": issue_data.get("project", {}),
|
||||
"level": issue_data.get("level", "info"),
|
||||
"awoooi_canary": "true",
|
||||
"operator_id": operator.operator_id,
|
||||
"telegram": "not_sent",
|
||||
"incident": "not_created",
|
||||
"approval": "not_created",
|
||||
},
|
||||
annotations={
|
||||
"message": event_data.get("message"),
|
||||
"summary": (
|
||||
"Operator-signed Sentry webhook canary; records upstream "
|
||||
"source evidence without creating incident, approval, or Telegram."
|
||||
),
|
||||
},
|
||||
payload={
|
||||
"raw_canary": payload,
|
||||
"operator_id": operator.operator_id,
|
||||
"auth_method": operator.auth_method,
|
||||
"side_effects": {
|
||||
"incident_created": False,
|
||||
"approval_created": False,
|
||||
"telegram_sent": False,
|
||||
"openclaw_called": False,
|
||||
},
|
||||
},
|
||||
)
|
||||
if event_uuid is None:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="sentry upstream canary was not recorded",
|
||||
)
|
||||
return {
|
||||
"status": "canary_recorded",
|
||||
"provider": "sentry",
|
||||
"event_id": issue_id,
|
||||
"conversation_event_id": str(event_uuid),
|
||||
"side_effects": {
|
||||
"incident_created": False,
|
||||
"approval_created": False,
|
||||
"telegram_sent": False,
|
||||
"openclaw_called": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@router.post("/error")
|
||||
async def handle_sentry_error(
|
||||
request: Request,
|
||||
@@ -108,6 +221,14 @@ async def handle_sentry_error(
|
||||
try:
|
||||
# 2026-04-27 P3.1-T2 by Claude — Tier-2 三服務感知強化:接入 SentryWebhookService 簽章驗證
|
||||
body = await request.body()
|
||||
try:
|
||||
payload_from_body = json.loads(body.decode("utf-8") or "{}")
|
||||
except json.JSONDecodeError:
|
||||
payload_from_body = {}
|
||||
|
||||
if isinstance(payload_from_body, dict) and _is_sentry_upstream_canary(payload_from_body):
|
||||
return await _record_sentry_upstream_canary(payload_from_body, request)
|
||||
|
||||
sig_header = request.headers.get("sentry-hook-signature", "")
|
||||
try:
|
||||
verify_sentry_signature(body, sig_header)
|
||||
@@ -124,16 +245,60 @@ async def handle_sentry_error(
|
||||
|
||||
# 提取錯誤資訊
|
||||
issue_data = payload.get("data", {}).get("issue", {})
|
||||
event_data = payload.get("data", {}).get("event", {})
|
||||
issue_id = issue_data.get("id")
|
||||
source_url = (
|
||||
issue_data.get("permalink")
|
||||
or issue_data.get("web_url")
|
||||
or issue_data.get("url")
|
||||
)
|
||||
|
||||
background_tasks.add_task(
|
||||
record_external_alert_event,
|
||||
project_id="awoooi",
|
||||
provider="sentry",
|
||||
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
|
||||
stage="received",
|
||||
title=str(issue_data.get("title") or "Sentry issue"),
|
||||
severity=str(issue_data.get("level") or "error"),
|
||||
namespace="sentry",
|
||||
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
|
||||
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
|
||||
source_url=source_url,
|
||||
labels={
|
||||
"project": issue_data.get("project", {}),
|
||||
"level": issue_data.get("level"),
|
||||
"culprit": issue_data.get("culprit"),
|
||||
},
|
||||
annotations={"message": event_data.get("message")},
|
||||
payload=payload,
|
||||
)
|
||||
|
||||
# Phase 10.2.1: 去重檢查 (10 分鐘內不重複發送)
|
||||
issue_id = issue_data.get("id")
|
||||
sentry_service = get_sentry_service()
|
||||
if not await sentry_service.check_dedup(issue_id, ttl=SENTRY_DEDUP_TTL):
|
||||
background_tasks.add_task(
|
||||
record_external_alert_event,
|
||||
project_id="awoooi",
|
||||
provider="sentry",
|
||||
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
|
||||
stage="deduplicated",
|
||||
title=str(issue_data.get("title") or "Sentry issue"),
|
||||
severity=str(issue_data.get("level") or "error"),
|
||||
namespace="sentry",
|
||||
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
|
||||
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
|
||||
source_url=source_url,
|
||||
labels={"project": issue_data.get("project", {}), "level": issue_data.get("level")},
|
||||
annotations={"message": event_data.get("message")},
|
||||
payload={"dedup_ttl": SENTRY_DEDUP_TTL},
|
||||
is_duplicate=True,
|
||||
)
|
||||
return {"status": "deduplicated", "issue_id": issue_id, "ttl": SENTRY_DEDUP_TTL}
|
||||
event_data = payload.get("data", {}).get("event", {})
|
||||
|
||||
error_context = {
|
||||
"issue_id": issue_data.get("id"),
|
||||
"source_url": source_url,
|
||||
"title": issue_data.get("title"),
|
||||
"culprit": issue_data.get("culprit"),
|
||||
"level": issue_data.get("level"),
|
||||
@@ -169,6 +334,8 @@ async def handle_sentry_error(
|
||||
"message": "Analysis scheduled"
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("Sentry webhook processing failed")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from e
|
||||
@@ -256,6 +423,29 @@ async def analyze_and_comment(
|
||||
analysis=analysis,
|
||||
anomaly_frequency=frequency_dict,
|
||||
)
|
||||
await record_external_alert_event(
|
||||
project_id="awoooi",
|
||||
provider="sentry",
|
||||
event_id=str(issue_id or error_context.get("issue_id") or "unknown"),
|
||||
stage="approval_linked",
|
||||
title=str(error_context.get("title") or "Sentry issue"),
|
||||
severity=str(error_context.get("level") or "error"),
|
||||
namespace="sentry",
|
||||
target_resource=str(error_context.get("culprit") or error_context.get("project") or "unknown"),
|
||||
fingerprint=f"sentry-{issue_id or error_context.get('issue_id') or 'unknown'}",
|
||||
approval_id=approval_id,
|
||||
source_url=error_context.get("source_url"),
|
||||
labels={
|
||||
"project": error_context.get("project"),
|
||||
"level": error_context.get("level"),
|
||||
},
|
||||
annotations={"message": error_context.get("message")},
|
||||
payload={
|
||||
"anomaly_frequency": frequency_dict,
|
||||
"ai_analyzed": analysis is not None,
|
||||
"ai_provider": analysis.analyzed_by if analysis else None,
|
||||
},
|
||||
)
|
||||
|
||||
# 4. 發送 Telegram 告警 (含頻率資訊)
|
||||
await send_sentry_telegram_alert(
|
||||
|
||||
@@ -1,7 +1,3 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
"""
|
||||
AWOOOI API - SignOz Webhook Handler
|
||||
====================================
|
||||
@@ -17,12 +13,17 @@ AWOOOI API - SignOz Webhook Handler
|
||||
🔴 HARD RULE: 時間顯示使用 Asia/Taipei (UTC+8)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.core.awooop_operator_auth import authenticate_awooop_operator_headers
|
||||
from src.core.metrics import (
|
||||
record_alert_chain_failure,
|
||||
record_alert_chain_success,
|
||||
@@ -37,10 +38,14 @@ from src.models.approval import (
|
||||
)
|
||||
from src.services.anomaly_counter import get_anomaly_counter
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.channel_hub import record_external_alert_event
|
||||
from src.services.incident_service import get_incident_service
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
from src.utils.timezone import now_taipei_iso
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.services.openclaw import LLMAnalysisResult
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/webhooks/signoz", tags=["SignOz Webhook"])
|
||||
@@ -67,6 +72,101 @@ class SignOzAlertPayload(BaseModel):
|
||||
generatorURL: str | None = None
|
||||
|
||||
|
||||
def _is_signoz_upstream_canary(alert: dict) -> bool:
|
||||
labels = alert.get("labels", {}) if isinstance(alert.get("labels"), dict) else {}
|
||||
annotations = (
|
||||
alert.get("annotations", {})
|
||||
if isinstance(alert.get("annotations"), dict)
|
||||
else {}
|
||||
)
|
||||
alert_name = str(alert.get("alertname") or labels.get("alertname") or "")
|
||||
return (
|
||||
str(labels.get("awoooi_canary", "")).lower() == "true"
|
||||
or alert_name == "AwoooPSourceProviderCanary"
|
||||
or str(annotations.get("awooop_canary", "")).lower() == "true"
|
||||
)
|
||||
|
||||
|
||||
async def _record_signoz_upstream_canary(
|
||||
alert: dict,
|
||||
request: Request,
|
||||
) -> dict:
|
||||
operator = authenticate_awooop_operator_headers(
|
||||
request.headers.get("x-awooop-operator-id"),
|
||||
request.headers.get("x-awooop-operator-key"),
|
||||
)
|
||||
labels = alert.get("labels", {}) if isinstance(alert.get("labels"), dict) else {}
|
||||
annotations = (
|
||||
alert.get("annotations", {})
|
||||
if isinstance(alert.get("annotations"), dict)
|
||||
else {}
|
||||
)
|
||||
alert_name = str(alert.get("alertname") or labels.get("alertname") or "AwoooPSourceProviderCanary")
|
||||
run_ref = str(labels.get("run_ref") or labels.get("fingerprint") or "unknown")
|
||||
event_id = f"awooop-canary-{run_ref}"
|
||||
severity = str(labels.get("severity") or "info")
|
||||
service_name = str(labels.get("service_name") or labels.get("service") or "source-provider-ingestion")
|
||||
namespace = str(labels.get("namespace") or "awoooi-prod")
|
||||
fingerprint = str(labels.get("fingerprint") or f"source-provider-canary:signoz:{run_ref}")
|
||||
event_uuid = await record_external_alert_event(
|
||||
project_id="awoooi",
|
||||
provider="signoz",
|
||||
event_id=event_id,
|
||||
stage="upstream_canary",
|
||||
title=alert_name,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=service_name,
|
||||
fingerprint=fingerprint,
|
||||
source_url=alert.get("generatorURL"),
|
||||
labels={
|
||||
**labels,
|
||||
"awoooi_canary": "true",
|
||||
"operator_id": operator.operator_id,
|
||||
"telegram": "not_sent",
|
||||
"incident": "not_created",
|
||||
"approval": "not_created",
|
||||
},
|
||||
annotations={
|
||||
**annotations,
|
||||
"summary": annotations.get("summary")
|
||||
or (
|
||||
"Operator-signed SignOz webhook canary; records upstream "
|
||||
"source evidence without creating incident, approval, or Telegram."
|
||||
),
|
||||
},
|
||||
payload={
|
||||
"raw_canary": alert,
|
||||
"operator_id": operator.operator_id,
|
||||
"auth_method": operator.auth_method,
|
||||
"side_effects": {
|
||||
"incident_created": False,
|
||||
"approval_created": False,
|
||||
"telegram_sent": False,
|
||||
"openclaw_called": False,
|
||||
},
|
||||
},
|
||||
)
|
||||
if event_uuid is None:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="signoz upstream canary was not recorded",
|
||||
)
|
||||
return {
|
||||
"status": "canary_recorded",
|
||||
"provider": "signoz",
|
||||
"event_id": event_id,
|
||||
"alert_name": alert_name,
|
||||
"conversation_event_id": str(event_uuid),
|
||||
"side_effects": {
|
||||
"incident_created": False,
|
||||
"approval_created": False,
|
||||
"telegram_sent": False,
|
||||
"openclaw_called": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@router.post("/alert")
|
||||
async def handle_signoz_alert(
|
||||
request: Request,
|
||||
@@ -99,11 +199,35 @@ async def handle_signoz_alert(
|
||||
results.append({"status": "ignored", "reason": "not firing"})
|
||||
continue
|
||||
|
||||
if _is_signoz_upstream_canary(alert):
|
||||
results.append(await _record_signoz_upstream_canary(alert, request))
|
||||
continue
|
||||
|
||||
# 提取告警資訊
|
||||
alert_name = alert.get("alertname", alert.get("labels", {}).get("alertname", "unknown"))
|
||||
labels = alert.get("labels", {})
|
||||
annotations = alert.get("annotations", {})
|
||||
severity = labels.get("severity", "warning")
|
||||
source_url = alert.get("generatorURL")
|
||||
service_name = labels.get("service_name", labels.get("service", "unknown"))
|
||||
fingerprint = labels.get("fingerprint") or f"signoz-{alert_name}-{service_name}"
|
||||
|
||||
background_tasks.add_task(
|
||||
record_external_alert_event,
|
||||
project_id="awoooi",
|
||||
provider="signoz",
|
||||
event_id=str(fingerprint),
|
||||
stage="received",
|
||||
title=str(alert_name),
|
||||
severity=str(severity),
|
||||
namespace=str(labels.get("namespace", "signoz")),
|
||||
target_resource=str(service_name),
|
||||
fingerprint=str(fingerprint),
|
||||
source_url=source_url,
|
||||
labels=labels,
|
||||
annotations=annotations,
|
||||
payload=alert,
|
||||
)
|
||||
|
||||
# 背景處理
|
||||
background_tasks.add_task(
|
||||
@@ -113,6 +237,8 @@ async def handle_signoz_alert(
|
||||
annotations=annotations,
|
||||
severity=severity,
|
||||
starts_at=alert.get("startsAt"),
|
||||
source_url=source_url,
|
||||
raw_payload=alert,
|
||||
)
|
||||
|
||||
results.append({
|
||||
@@ -122,6 +248,8 @@ async def handle_signoz_alert(
|
||||
|
||||
return {"status": "ok", "processed": len(results), "results": results}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("signoz_webhook_error", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e)) from e
|
||||
@@ -133,6 +261,8 @@ async def process_signoz_alert(
|
||||
annotations: dict,
|
||||
severity: str,
|
||||
starts_at: str | None,
|
||||
source_url: str | None = None,
|
||||
raw_payload: dict | None = None,
|
||||
):
|
||||
"""
|
||||
背景處理 SignOz 告警
|
||||
@@ -190,6 +320,7 @@ async def process_signoz_alert(
|
||||
"annotations": annotations,
|
||||
"fingerprint": f"signoz-{alert_name}-{labels.get('service_name', 'unknown')}",
|
||||
}
|
||||
fingerprint = signal_data["fingerprint"]
|
||||
# ADR-037: 傳遞頻率統計到 Incident
|
||||
incident = await incident_service.create_incident_from_signal(
|
||||
signal_data, frequency_stats=anomaly_frequency
|
||||
@@ -229,6 +360,30 @@ async def process_signoz_alert(
|
||||
anomaly_frequency=anomaly_frequency,
|
||||
analysis_result=analysis_result, # 帶入 AI 結果
|
||||
)
|
||||
await record_external_alert_event(
|
||||
project_id="awoooi",
|
||||
provider="signoz",
|
||||
event_id=str(fingerprint),
|
||||
stage="incident_linked",
|
||||
title=str(alert_name),
|
||||
severity=str(severity),
|
||||
namespace=str(labels.get("namespace", "signoz")),
|
||||
target_resource=str(labels.get("service_name", labels.get("service", "unknown"))),
|
||||
fingerprint=str(fingerprint),
|
||||
incident_id=str(incident.incident_id),
|
||||
approval_id=str(approval_id),
|
||||
source_url=source_url or trace_url,
|
||||
labels=labels,
|
||||
annotations=annotations,
|
||||
payload={
|
||||
"raw_alert": raw_payload or {},
|
||||
"trace_url": trace_url,
|
||||
"has_signoz_metrics": bool(signoz_metrics),
|
||||
"ai_provider": ai_provider,
|
||||
"tokens": tokens,
|
||||
"cost": cost,
|
||||
},
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# Step 5: 發送 Telegram 告警
|
||||
@@ -282,7 +437,7 @@ async def create_signoz_approval(
|
||||
severity: str,
|
||||
incident_id: str,
|
||||
anomaly_frequency: dict | None = None,
|
||||
analysis_result: "LLMAnalysisResult" | None = None,
|
||||
analysis_result: LLMAnalysisResult | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
為 SignOz 告警建立 Approval 記錄
|
||||
@@ -379,7 +534,7 @@ async def send_signoz_telegram(
|
||||
annotations: dict,
|
||||
severity: str,
|
||||
anomaly_frequency: dict | None = None,
|
||||
analysis_result: "LLMAnalysisResult" | None = None,
|
||||
analysis_result: LLMAnalysisResult | None = None,
|
||||
ai_provider: str = "none",
|
||||
):
|
||||
"""
|
||||
@@ -442,6 +597,7 @@ async def _send_log_summary_notification(
|
||||
帶 5s 軟超時:超時後摘要繼續生成並存 Redis,不阻塞告警主流程
|
||||
"""
|
||||
import html as _html
|
||||
|
||||
from src.services.log_summary_service import get_log_summary_service
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ Endpoints:
|
||||
- 每個 Nonce 只能使用一次
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
@@ -27,6 +28,8 @@ from pydantic import BaseModel
|
||||
from src.core.config import settings
|
||||
from src.core.logging import get_logger
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.approval_execution import get_execution_service
|
||||
from src.services.incident_approval_service import get_incident_approval_service
|
||||
from src.services.security_interceptor import (
|
||||
NonceReplayError,
|
||||
UserNotWhitelistedError,
|
||||
@@ -64,6 +67,80 @@ class TestPushRequest(BaseModel):
|
||||
incident_id: str = ""
|
||||
|
||||
|
||||
async def _run_telegram_approved_execution(approval) -> None:
|
||||
"""Run the approved action that originated from a Telegram callback."""
|
||||
approval_id = str(getattr(approval, "id", ""))
|
||||
incident_id = getattr(approval, "incident_id", None)
|
||||
try:
|
||||
result = await get_execution_service().execute_approved_action(approval)
|
||||
logger.info(
|
||||
"telegram_approval_execution_completed",
|
||||
approval_id=approval_id,
|
||||
incident_id=incident_id,
|
||||
success=bool(result),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"telegram_approval_execution_failed",
|
||||
approval_id=approval_id,
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
|
||||
def _schedule_telegram_approved_execution(approval) -> bool:
|
||||
"""Schedule execution after Telegram approval reaches required signatures."""
|
||||
try:
|
||||
asyncio.create_task(_run_telegram_approved_execution(approval))
|
||||
logger.info(
|
||||
"telegram_approval_execution_scheduled",
|
||||
approval_id=str(getattr(approval, "id", "")),
|
||||
incident_id=getattr(approval, "incident_id", None),
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"telegram_approval_execution_schedule_failed",
|
||||
approval_id=str(getattr(approval, "id", "")),
|
||||
incident_id=getattr(approval, "incident_id", None),
|
||||
error=str(exc),
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
async def _finalize_telegram_approval(approval, execution_triggered: bool) -> bool:
|
||||
"""Complete the execution handoff for Telegram approvals.
|
||||
|
||||
ApprovalDBService only records the signature/status transition. The actual
|
||||
executor scheduling lives in API callers, so Telegram must mirror the REST
|
||||
approval endpoint instead of stopping at a visual approval stamp.
|
||||
"""
|
||||
if not execution_triggered:
|
||||
return False
|
||||
return _schedule_telegram_approved_execution(approval)
|
||||
|
||||
|
||||
async def _sync_telegram_rejection(approval_id: str) -> bool:
|
||||
"""Keep Incident state aligned when an approval is rejected from Telegram."""
|
||||
try:
|
||||
await get_incident_approval_service().on_approval_status_change(
|
||||
approval_id=approval_id,
|
||||
new_status="rejected",
|
||||
)
|
||||
logger.info(
|
||||
"telegram_rejection_incident_synced",
|
||||
approval_id=approval_id,
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"telegram_rejection_incident_sync_failed",
|
||||
approval_id=approval_id,
|
||||
error=str(exc),
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoints
|
||||
# =============================================================================
|
||||
@@ -198,21 +275,50 @@ async def telegram_webhook(
|
||||
)
|
||||
|
||||
if approval:
|
||||
status_value = approval.status.value if hasattr(approval.status, "value") else str(approval.status)
|
||||
if (
|
||||
"Cannot sign" in msg
|
||||
or "already signed" in msg
|
||||
or "Concurrent modification" in msg
|
||||
):
|
||||
logger.info(
|
||||
"telegram_approval_ignored_already_processed",
|
||||
approval_id=approval_id,
|
||||
user_id=user_id,
|
||||
status=status_value,
|
||||
message=msg,
|
||||
)
|
||||
await _log_user_action("approve_duplicate", False, getattr(approval, "incident_id", None))
|
||||
return {
|
||||
"ok": True,
|
||||
"message": "Already processed",
|
||||
"approval_id": approval_id,
|
||||
"status": status_value,
|
||||
"execution_triggered": False,
|
||||
"execution_scheduled": False,
|
||||
}
|
||||
|
||||
execution_scheduled = await _finalize_telegram_approval(
|
||||
approval=approval,
|
||||
execution_triggered=execution_triggered,
|
||||
)
|
||||
logger.info(
|
||||
"telegram_approval_signed",
|
||||
approval_id=approval_id,
|
||||
user_id=user_id,
|
||||
status=approval.status.value,
|
||||
status=status_value,
|
||||
execution_triggered=execution_triggered,
|
||||
execution_scheduled=execution_scheduled,
|
||||
)
|
||||
await _log_user_action("approve", True, getattr(approval, "incident_id", None))
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"message": "Approved",
|
||||
"message": "Approved" if execution_triggered else "Signed",
|
||||
"approval_id": approval_id,
|
||||
"status": approval.status.value,
|
||||
"status": status_value,
|
||||
"execution_triggered": execution_triggered,
|
||||
"execution_scheduled": execution_scheduled,
|
||||
}
|
||||
|
||||
elif action == "reject":
|
||||
@@ -224,10 +330,12 @@ async def telegram_webhook(
|
||||
)
|
||||
|
||||
if approval:
|
||||
incident_synced = await _sync_telegram_rejection(approval_id)
|
||||
logger.info(
|
||||
"telegram_approval_rejected",
|
||||
approval_id=approval_id,
|
||||
user_id=user_id,
|
||||
incident_synced=incident_synced,
|
||||
)
|
||||
await _log_user_action("reject", False, getattr(approval, "incident_id", None))
|
||||
|
||||
@@ -236,6 +344,7 @@ async def telegram_webhook(
|
||||
"message": "Rejected",
|
||||
"approval_id": approval_id,
|
||||
"status": approval.status.value,
|
||||
"incident_synced": incident_synced,
|
||||
}
|
||||
|
||||
return {"ok": False, "message": "Unknown action"}
|
||||
|
||||
@@ -33,14 +33,8 @@ from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.config import settings
|
||||
from src.core.constants import is_cicd_alertname, is_heartbeat_alertname
|
||||
from src.services.alert_rule_engine import get_incident_type, match_rule
|
||||
from src.services.action_parser import is_safe_kubectl_action
|
||||
from src.services.security_interceptor import check_webhook_nonce # P0-06: nonce dedup via Service 層
|
||||
from src.core.logging import get_logger
|
||||
from src.core.metrics import record_alert_chain_success
|
||||
|
||||
# Phase 15.2: Trace Context (moved to SignalProducerService)
|
||||
# get_trace_context 已移至 Service 層
|
||||
from src.models.approval import (
|
||||
ApprovalRequestCreate,
|
||||
BlastRadius,
|
||||
@@ -48,31 +42,43 @@ from src.models.approval import (
|
||||
DryRunCheck,
|
||||
RiskLevel,
|
||||
)
|
||||
|
||||
# R4 #129 (2026-04-01 ogt): AlertPayload/AlertResponse 移至 models 層,AlertAnalyzer 移至 services 層
|
||||
# ogt 更新 v1.1 2026-04-01 台北時間: generate_alert_fingerprint 移至 alert_analyzer_service (ADR-024)
|
||||
# [首席架構師] 移除 generate_alert_fingerprint 直接 import,改用 AlertAnalyzer.generate_fingerprint v1.2 2026-04-01 Asia/Taipei
|
||||
from src.models.webhook import AlertPayload, AlertResponse
|
||||
from src.services.action_parser import is_safe_kubectl_action
|
||||
from src.services.alert_analyzer_service import AlertAnalyzer
|
||||
from src.services.alert_approval_guard import guard_alert_approval_action
|
||||
from src.services.alert_grouping_service import get_alert_grouping_service
|
||||
from src.services.alert_rule_engine import get_incident_type, match_rule
|
||||
from src.services.alertmanager_llm_guard import (
|
||||
ALERTMANAGER_LLM_INFLIGHT_LOCK_TTL_SECONDS,
|
||||
try_acquire_alertmanager_llm_lock,
|
||||
)
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.auto_approve import get_auto_approve_policy
|
||||
from src.services.auto_repair_service import AutoRepairService
|
||||
from src.services.channel_hub import (
|
||||
record_alertmanager_event,
|
||||
record_grouped_alert_event,
|
||||
)
|
||||
|
||||
# Phase 15.2: Trace Context (moved to SignalProducerService)
|
||||
# get_trace_context 已移至 Service 層
|
||||
|
||||
# R4 #129 (2026-04-01 ogt): AlertPayload/AlertResponse 移至 models 層,AlertAnalyzer 移至 services 層
|
||||
# ogt 更新 v1.1 2026-04-01 台北時間: generate_alert_fingerprint 移至 alert_analyzer_service (ADR-024)
|
||||
# [首席架構師] 移除 generate_alert_fingerprint 直接 import,改用 AlertAnalyzer.generate_fingerprint v1.2 2026-04-01 Asia/Taipei
|
||||
|
||||
# Phase 17 P0: Service 層 (消除 Router 直接存取 Redis)
|
||||
# C2 修正 (首席架構師審查 2026-04-10): create_incident_for_approval + extract_affected_services 已移入 Service 層
|
||||
from src.services.incident_service import (
|
||||
classify_alert_early,
|
||||
create_incident_for_approval,
|
||||
extract_affected_services,
|
||||
get_incident_service,
|
||||
)
|
||||
from src.services.auto_approve import get_auto_approve_policy
|
||||
from src.services.auto_repair_service import AutoRepairService
|
||||
|
||||
# Phase 5: OpenClaw AI Engine
|
||||
from src.services.openclaw import get_openclaw
|
||||
from src.services.playbook_match_resolver import resolve_playbook_id_for_alert
|
||||
from src.services.security_interceptor import check_webhook_nonce # P0-06: nonce dedup via Service 層
|
||||
from src.services.signal_producer import SignalData, get_signal_producer
|
||||
|
||||
# Phase 5: Telegram Gateway (行動戰情室)
|
||||
@@ -81,9 +87,6 @@ from src.services.telegram_gateway import TelegramGatewayError, get_telegram_gat
|
||||
# Phase 18.1.7: K8s 資源名稱正規化 已移至 alert_analyzer_service (R4 #129)
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
# ADR-076: 告警聚合引擎 (2026-04-14 Claude Haiku 4.5 Asia/Taipei)
|
||||
from src.services.alert_grouping_service import get_alert_grouping_service
|
||||
|
||||
router = APIRouter(prefix="/webhooks", tags=["Webhooks"])
|
||||
logger = get_logger("awoooi.webhooks")
|
||||
|
||||
@@ -136,6 +139,38 @@ def _should_use_alertmanager_rule_first(
|
||||
)
|
||||
|
||||
|
||||
async def _analyze_alertmanager_with_timeout(
|
||||
openclaw,
|
||||
alert_context: dict,
|
||||
*,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
) -> tuple:
|
||||
"""Run Alertmanager AI analysis without letting it block the workflow forever."""
|
||||
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
openclaw.analyze_alert(alert_context),
|
||||
timeout=ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS,
|
||||
)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"alertmanager_openclaw_timeout_fallback",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
timeout_sec=ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS,
|
||||
)
|
||||
return None, "fallback_timeout", "", None, "", 0, 0.0
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"alertmanager_openclaw_failed_fallback",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
error=str(exc),
|
||||
)
|
||||
return None, "fallback_error", "", None, "", 0, 0.0
|
||||
|
||||
|
||||
async def _escalate_auto_repair_unavailable(
|
||||
*,
|
||||
incident_id: str,
|
||||
@@ -163,6 +198,19 @@ async def _escalate_auto_repair_unavailable(
|
||||
)
|
||||
|
||||
|
||||
def _auto_repair_action_label(result, fallback_target: str) -> str:
|
||||
"""Build a verifier label that includes the actual playbook steps."""
|
||||
playbook_id = getattr(result, "playbook_id", None) or "unknown"
|
||||
steps = getattr(result, "executed_steps", None) or []
|
||||
step_text = " | ".join(str(step) for step in steps).strip()
|
||||
if not step_text:
|
||||
step_text = fallback_target
|
||||
step_text = " ".join(step_text.split())
|
||||
if len(step_text) > 240:
|
||||
step_text = f"{step_text[:237]}..."
|
||||
return f"auto_repair_playbook:{playbook_id} {step_text}".strip()
|
||||
|
||||
|
||||
async def _try_auto_repair_background(
|
||||
incident_id: str,
|
||||
approval_id: str,
|
||||
@@ -252,6 +300,46 @@ async def _try_auto_repair_background(
|
||||
},
|
||||
)
|
||||
|
||||
_pre_execution_snapshot = None
|
||||
try:
|
||||
from src.core.feature_flags import aiops_flags
|
||||
|
||||
if aiops_flags.is_sub_flag_enabled("AIOPS_P1_PRE_DECISION_INVESTIGATOR"):
|
||||
from src.services.evidence_snapshot import get_latest_snapshot
|
||||
from src.services.post_execution_verifier import get_post_execution_verifier
|
||||
|
||||
_pre_execution_snapshot = await get_latest_snapshot(incident_id)
|
||||
if _pre_execution_snapshot is None:
|
||||
from src.services.pre_decision_investigator import (
|
||||
get_pre_decision_investigator,
|
||||
)
|
||||
|
||||
_pre_execution_snapshot = await asyncio.wait_for(
|
||||
get_pre_decision_investigator().investigate(incident),
|
||||
timeout=60.0,
|
||||
)
|
||||
if _pre_execution_snapshot is not None:
|
||||
await asyncio.wait_for(
|
||||
get_post_execution_verifier().capture_pre_execution_state(
|
||||
incident,
|
||||
_pre_execution_snapshot,
|
||||
),
|
||||
timeout=30.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"auto_repair_pre_state_capture_timeout",
|
||||
incident_id=incident_id,
|
||||
approval_id=approval_id,
|
||||
)
|
||||
except Exception as _pre_state_err:
|
||||
logger.warning(
|
||||
"auto_repair_pre_state_capture_failed",
|
||||
incident_id=incident_id,
|
||||
approval_id=approval_id,
|
||||
error=str(_pre_state_err),
|
||||
)
|
||||
|
||||
# 執行自動修復
|
||||
logger.info(
|
||||
"auto_repair_executing",
|
||||
@@ -263,6 +351,7 @@ async def _try_auto_repair_background(
|
||||
playbook=decision.playbook,
|
||||
is_cold_start=decision.is_cold_start,
|
||||
similarity_score=decision.similarity_score,
|
||||
run_post_verification=False,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
@@ -273,6 +362,20 @@ async def _try_auto_repair_background(
|
||||
|
||||
# 記錄執行結果
|
||||
if result:
|
||||
try:
|
||||
await get_approval_service().update_execution_status(
|
||||
approval_id=approval_id,
|
||||
success=result.success,
|
||||
error_message=result.error,
|
||||
)
|
||||
except Exception as _approval_status_err:
|
||||
logger.warning(
|
||||
"auto_repair_approval_status_update_failed",
|
||||
approval_id=approval_id,
|
||||
incident_id=incident_id,
|
||||
error=str(_approval_status_err),
|
||||
)
|
||||
|
||||
await op_log.append(
|
||||
"EXECUTION_COMPLETED",
|
||||
incident_id=incident_id,
|
||||
@@ -336,11 +439,10 @@ async def _try_auto_repair_background(
|
||||
from src.services.evidence_snapshot import get_latest_snapshot
|
||||
from src.services.learning_service import get_learning_service
|
||||
|
||||
_snapshot = await get_latest_snapshot(incident_id)
|
||||
_action_label = (
|
||||
f"{target_resource}:{namespace}"
|
||||
if not result.success
|
||||
else f"auto_repair_playbook:{result.playbook_id}"
|
||||
_snapshot = _pre_execution_snapshot or await get_latest_snapshot(incident_id)
|
||||
_action_label = _auto_repair_action_label(
|
||||
result,
|
||||
fallback_target=f"{target_resource}:{namespace}",
|
||||
)
|
||||
_verifier = get_post_execution_verifier()
|
||||
_verify_result = await asyncio.wait_for(
|
||||
@@ -792,6 +894,7 @@ async def verify_webhook_signature(
|
||||
|
||||
# 戰略 B: 滑動時間窗 (ADR-073: 5 分鐘改 30 分鐘,防同一問題反覆重建 Incident,2026-04-12 ogt)
|
||||
DEBOUNCE_WINDOW_MINUTES = 30
|
||||
ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS = 90.0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -1105,7 +1208,12 @@ async def receive_alert(
|
||||
# 呼叫 OpenClaw LLM 分析 (v7.0 含 SignOz 整合)
|
||||
# 2026-03-29 ogt: 加入 Token/Cost 追蹤
|
||||
openclaw = get_openclaw()
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await _analyze_alertmanager_with_timeout(
|
||||
openclaw,
|
||||
alert_context,
|
||||
alert_id=alert_id,
|
||||
alertname=alert.alert_type,
|
||||
)
|
||||
|
||||
if analysis_result:
|
||||
# LLM 分析成功
|
||||
@@ -1147,15 +1255,33 @@ async def receive_alert(
|
||||
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE)
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_cmd_cs1 = (analysis_result.kubectl_command or "").strip()
|
||||
_alertname_cs1 = str((alert.labels or {}).get("alertname") or alert.alert_type or "")
|
||||
_guarded_action_cs1 = await guard_alert_approval_action(
|
||||
action=(_cmd_cs1 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
alert_namespace=alert.namespace,
|
||||
alertname=_alertname_cs1,
|
||||
alert_category=get_incident_type(_alertname_cs1),
|
||||
)
|
||||
_matched_playbook_id_cs1 = await resolve_playbook_id_for_alert(
|
||||
alertname=_alertname_cs1,
|
||||
affected_services=analysis_result.affected_services
|
||||
or ([alert.target_resource] if alert.target_resource else []),
|
||||
severity=risk_level.value,
|
||||
)
|
||||
if _guarded_action_cs1.blocked:
|
||||
risk_level = RiskLevel.LOW
|
||||
_cmd_cs1 = ""
|
||||
|
||||
_approval_metadata_cs1 = {
|
||||
"source": ai_provider,
|
||||
"confidence_score": analysis_result.confidence,
|
||||
"is_rule_based": False,
|
||||
"playbook_id": None,
|
||||
"playbook_id": _matched_playbook_id_cs1,
|
||||
**_guarded_action_cs1.metadata,
|
||||
}
|
||||
_cmd_cs1 = (analysis_result.kubectl_command or "").strip()
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=(_cmd_cs1 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
action=_guarded_action_cs1.action,
|
||||
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
|
||||
risk_level=risk_level,
|
||||
blast_radius=BlastRadius(
|
||||
@@ -1172,6 +1298,7 @@ async def receive_alert(
|
||||
],
|
||||
requested_by=f"OpenClaw ({ai_provider})",
|
||||
metadata=_approval_metadata_cs1,
|
||||
matched_playbook_id=_matched_playbook_id_cs1,
|
||||
)
|
||||
suggested_action = analysis_result.kubectl_command
|
||||
else:
|
||||
@@ -1218,7 +1345,7 @@ async def receive_alert(
|
||||
# 設計:confidence ≥ 0.85 + 非 CRITICAL + 非破壞性 + 有 kubectl 指令 → 直接執行
|
||||
# 安全防線:CRITICAL / destructive patterns / NO_ACTION/INVESTIGATE/OBSERVE / 空 kubectl → 降級 PENDING
|
||||
if analysis_result:
|
||||
_cs1_kubectl = analysis_result.kubectl_command.strip() if analysis_result.kubectl_command else ""
|
||||
_cs1_kubectl = _cmd_cs1
|
||||
_cs1_can_auto = (
|
||||
bool(_cs1_kubectl)
|
||||
and analysis_result.confidence >= 0.85
|
||||
@@ -1239,7 +1366,7 @@ async def receive_alert(
|
||||
required_signatures=0,
|
||||
status=ApprovalStatus.APPROVED,
|
||||
risk_level=risk_level.value,
|
||||
matched_playbook_id=None,
|
||||
matched_playbook_id=_matched_playbook_id_cs1,
|
||||
metadata={
|
||||
**_approval_metadata_cs1,
|
||||
"is_high_confidence": True,
|
||||
@@ -1420,6 +1547,39 @@ class AlertmanagerPayload(BaseModel):
|
||||
alerts: list[AlertmanagerAlert]
|
||||
|
||||
|
||||
_CICD_JOB_STATUSES = frozenset({"running", "success", "failed", "pending"})
|
||||
|
||||
|
||||
def _cicd_job_status_from_alert(alert: AlertmanagerAlert) -> str:
|
||||
"""將 CI/CD Alertmanager label 轉成 TelegramGateway 支援的狀態。
|
||||
|
||||
2026-05-12 Codex: Gitea workflow 先送進 AWOOI API,不能只靠
|
||||
severity=info 推 success,否則 failed/pending 事件進 AwoooP 後語義會失真。
|
||||
"""
|
||||
labels = alert.labels or {}
|
||||
for key in ("status", "job_status", "ci_status"):
|
||||
value = str(labels.get(key) or "").strip().lower()
|
||||
if value in _CICD_JOB_STATUSES:
|
||||
return value
|
||||
|
||||
severity = str(labels.get("severity") or "").strip().lower()
|
||||
if severity == "info":
|
||||
return "success"
|
||||
if severity in {"critical", "error"}:
|
||||
return "failed"
|
||||
return "running"
|
||||
|
||||
|
||||
def _cicd_duration_seconds_from_alert(alert: AlertmanagerAlert) -> int:
|
||||
labels = alert.labels or {}
|
||||
raw = labels.get("duration_seconds") or labels.get("duration") or 0
|
||||
try:
|
||||
value = int(str(raw).strip())
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
return max(value, 0)
|
||||
|
||||
|
||||
def is_internal_ip(client_ip: str) -> bool:
|
||||
"""檢查是否為內網 IP"""
|
||||
import ipaddress
|
||||
@@ -1456,6 +1616,11 @@ async def _process_new_alert_background(
|
||||
try:
|
||||
service = get_approval_service()
|
||||
openclaw = get_openclaw()
|
||||
traced_alert_labels = {
|
||||
**(alert_labels or {}),
|
||||
"fingerprint": fingerprint,
|
||||
"alert_id": alert_id,
|
||||
}
|
||||
|
||||
rule_response = match_rule(alert_context)
|
||||
should_bypass_llm = _should_use_alertmanager_rule_first(rule_response, alert_category)
|
||||
@@ -1489,7 +1654,6 @@ async def _process_new_alert_background(
|
||||
str(blast.get("data_impact", "NONE")).upper(),
|
||||
DataImpact.NONE,
|
||||
)
|
||||
rule_action_title = str(rule_response.get("action_title", "人工排查主機告警"))
|
||||
rule_kubectl = str(rule_response.get("kubectl_command", "")).strip()
|
||||
rule_description = str(rule_response.get("description", message))
|
||||
rule_action = (
|
||||
@@ -1497,13 +1661,31 @@ async def _process_new_alert_background(
|
||||
if rule_kubectl else
|
||||
f"NO_ACTION - {rule_description[:120]}"
|
||||
)
|
||||
_matched_playbook_id_cs2 = await resolve_playbook_id_for_alert(
|
||||
rule_id=str(rule_response.get("rule_id", "")),
|
||||
alertname=alertname,
|
||||
affected_services=[target_resource] if target_resource else [],
|
||||
severity=rule_risk.value,
|
||||
)
|
||||
_guarded_action_cs2 = await guard_alert_approval_action(
|
||||
action=rule_action,
|
||||
alert_namespace=namespace,
|
||||
alertname=alertname,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
if _guarded_action_cs2.blocked:
|
||||
rule_action = _guarded_action_cs2.action
|
||||
rule_kubectl = ""
|
||||
rule_risk = RiskLevel.LOW
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_approval_metadata_cs2 = {
|
||||
"source": "rule_engine",
|
||||
"confidence_score": float(rule_response.get("confidence", 0.0) or 0.0),
|
||||
"is_rule_based": True,
|
||||
"playbook_id": str(rule_response.get("rule_id", "")) or None,
|
||||
"rule_id": str(rule_response.get("rule_id", "")) or None,
|
||||
"playbook_id": _matched_playbook_id_cs2,
|
||||
**_guarded_action_cs2.metadata,
|
||||
}
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=rule_action,
|
||||
@@ -1534,6 +1716,7 @@ async def _process_new_alert_background(
|
||||
],
|
||||
requested_by="OpenClaw (rule-engine)",
|
||||
metadata=_approval_metadata_cs2,
|
||||
matched_playbook_id=_matched_playbook_id_cs2,
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
@@ -1565,6 +1748,10 @@ async def _process_new_alert_background(
|
||||
# 2026-04-27 ogt + Claude Sonnet 4.6: CS2 規則引擎自動執行
|
||||
# 設計:is_rule_based=True 確定性高,滿足條件直接執行,不等人工審核
|
||||
# 安全防線:CRITICAL / destructive patterns / NO_ACTION / 空 kubectl → 全部降級 PENDING
|
||||
_cs2_auto_approval = None
|
||||
_cs2_executor = None
|
||||
_cs2_exec_success: bool | None = None
|
||||
_cs2_exec_error: str | None = None
|
||||
try:
|
||||
from src.models.approval import ApprovalRequest, ApprovalStatus
|
||||
from src.services.approval_execution import ApprovalExecutionService
|
||||
@@ -1584,10 +1771,11 @@ async def _process_new_alert_background(
|
||||
required_signatures=0,
|
||||
status=ApprovalStatus.APPROVED,
|
||||
risk_level=rule_risk.value,
|
||||
matched_playbook_id=_approval_metadata_cs2.get("playbook_id"),
|
||||
matched_playbook_id=_matched_playbook_id_cs2,
|
||||
)
|
||||
# 使用 DB 中剛建立的 approval.id 讓 executor 可回寫
|
||||
_auto_approval.id = approval.id
|
||||
_cs2_auto_approval = _auto_approval
|
||||
|
||||
_cs2_executor = ApprovalExecutionService()
|
||||
_cs2_exec_success = await _cs2_executor.execute_approved_action(_auto_approval)
|
||||
@@ -1610,6 +1798,8 @@ async def _process_new_alert_background(
|
||||
exec_success=_cs2_exec_success,
|
||||
)
|
||||
except Exception as _auto_err:
|
||||
_cs2_exec_success = False if _cs2_auto_approval is not None else None
|
||||
_cs2_exec_error = str(_auto_err)
|
||||
logger.warning(
|
||||
"cs2_auto_execute_failed_degraded_to_pending",
|
||||
approval_id=str(approval.id),
|
||||
@@ -1625,7 +1815,7 @@ async def _process_new_alert_background(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
alert_labels=traced_alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
@@ -1641,6 +1831,41 @@ async def _process_new_alert_background(
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
await record_alertmanager_event(
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
repeat_count=1,
|
||||
labels=traced_alert_labels,
|
||||
annotations=alert_context.get("annotations", {}),
|
||||
)
|
||||
|
||||
if _cs2_auto_approval is not None and _cs2_exec_success is not None:
|
||||
try:
|
||||
_cs2_auto_approval.incident_id = incident_id
|
||||
_cs2_executor = _cs2_executor or ApprovalExecutionService()
|
||||
await _cs2_executor.finalize_auto_approved_execution(
|
||||
_cs2_auto_approval,
|
||||
success=_cs2_exec_success,
|
||||
error_message=_cs2_exec_error,
|
||||
)
|
||||
except Exception as _cs2_finalize_err:
|
||||
logger.warning(
|
||||
"cs2_auto_execute_finalize_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(_cs2_finalize_err),
|
||||
)
|
||||
|
||||
_is_heartbeat = is_heartbeat_alertname(alertname)
|
||||
if can_auto_repair and not _is_heartbeat:
|
||||
await _try_auto_repair_background(
|
||||
@@ -1694,7 +1919,12 @@ async def _process_new_alert_background(
|
||||
record_alert_chain_success("alertmanager")
|
||||
return
|
||||
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await _analyze_alertmanager_with_timeout(
|
||||
openclaw,
|
||||
alert_context,
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
)
|
||||
|
||||
if analysis_result:
|
||||
risk_mapping = {
|
||||
@@ -1724,15 +1954,34 @@ async def _process_new_alert_background(
|
||||
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE) if blast else DataImpact.NONE
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_cmd_cs3 = (analysis_result.kubectl_command or "").strip()
|
||||
_guarded_action_cs3 = await guard_alert_approval_action(
|
||||
action=(_cmd_cs3 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
alert_namespace=namespace,
|
||||
alertname=alertname,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
_matched_playbook_id_cs3 = await resolve_playbook_id_for_alert(
|
||||
rule_id=str(rule_response.get("rule_id", "")),
|
||||
alertname=alertname,
|
||||
affected_services=analysis_result.affected_services
|
||||
or ([target_resource] if target_resource else []),
|
||||
severity=risk_level.value,
|
||||
)
|
||||
if _guarded_action_cs3.blocked:
|
||||
risk_level = RiskLevel.LOW
|
||||
_cmd_cs3 = ""
|
||||
|
||||
_approval_metadata_cs3 = {
|
||||
"source": ai_provider,
|
||||
"confidence_score": analysis_result.confidence,
|
||||
"is_rule_based": False,
|
||||
"playbook_id": None,
|
||||
"rule_id": str(rule_response.get("rule_id", "")) or None,
|
||||
"playbook_id": _matched_playbook_id_cs3,
|
||||
**_guarded_action_cs3.metadata,
|
||||
}
|
||||
_cmd_cs3 = (analysis_result.kubectl_command or "").strip()
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=(_cmd_cs3 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
action=_guarded_action_cs3.action,
|
||||
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
|
||||
risk_level=risk_level,
|
||||
blast_radius=BlastRadius(
|
||||
@@ -1747,6 +1996,7 @@ async def _process_new_alert_background(
|
||||
],
|
||||
requested_by=f"OpenClaw ({ai_provider})",
|
||||
metadata=_approval_metadata_cs3,
|
||||
matched_playbook_id=_matched_playbook_id_cs3,
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
@@ -1760,7 +2010,7 @@ async def _process_new_alert_background(
|
||||
"risk_level": risk_level.value,
|
||||
"confidence": analysis_result.confidence,
|
||||
"action": approval_create.action,
|
||||
"kubectl_command": analysis_result.kubectl_command,
|
||||
"kubectl_command": _cmd_cs3,
|
||||
"is_rule_based": False,
|
||||
"source": ai_provider,
|
||||
}
|
||||
@@ -1776,7 +2026,7 @@ async def _process_new_alert_background(
|
||||
logger.warning("shadow_auto_approve_failed", error=str(_shadow_err_cs3))
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: CS3 LLM 高信心自動執行(修法3擴展)
|
||||
_cs3_kubectl = (analysis_result.kubectl_command or "").strip()
|
||||
_cs3_kubectl = _cmd_cs3
|
||||
_cs3_can_auto = (
|
||||
bool(_cs3_kubectl)
|
||||
and analysis_result.confidence >= 0.85
|
||||
@@ -1784,8 +2034,15 @@ async def _process_new_alert_background(
|
||||
and "NO_ACTION" not in (analysis_result.action_title or "")
|
||||
and is_safe_kubectl_action(_cs3_kubectl)
|
||||
)
|
||||
_cs3_auto_approval = None
|
||||
_cs3_executor = None
|
||||
_cs3_exec_success: bool | None = None
|
||||
_cs3_exec_error: str | None = None
|
||||
if _cs3_can_auto:
|
||||
try:
|
||||
from src.models.approval import ApprovalRequest, ApprovalStatus
|
||||
from src.services.approval_execution import ApprovalExecutionService
|
||||
|
||||
_cs3_auto_approval = ApprovalRequest(
|
||||
action=approval_create.action,
|
||||
description=approval_create.description,
|
||||
@@ -1793,7 +2050,7 @@ async def _process_new_alert_background(
|
||||
required_signatures=0,
|
||||
status=ApprovalStatus.APPROVED,
|
||||
risk_level=risk_level.value,
|
||||
matched_playbook_id=None,
|
||||
matched_playbook_id=_matched_playbook_id_cs3,
|
||||
metadata={
|
||||
**_approval_metadata_cs3,
|
||||
"is_high_confidence": True,
|
||||
@@ -1802,8 +2059,17 @@ async def _process_new_alert_background(
|
||||
else "cs3_auto_confident_execution",
|
||||
},
|
||||
)
|
||||
_cs3_auto_approval.id = approval.id
|
||||
_cs3_executor = ApprovalExecutionService()
|
||||
_cs3_exec_success = await _cs3_executor.execute_approved_action(_cs3_auto_approval)
|
||||
try:
|
||||
await service.update_execution_status(approval.id, _cs3_exec_success)
|
||||
except Exception as _cs3_upd_err:
|
||||
logger.warning(
|
||||
"cs3_auto_execute_status_update_failed",
|
||||
approval_id=str(approval.id),
|
||||
error=str(_cs3_upd_err),
|
||||
)
|
||||
logger.info(
|
||||
"cs3_llm_auto_executed",
|
||||
approval_id=str(approval.id),
|
||||
@@ -1819,6 +2085,8 @@ async def _process_new_alert_background(
|
||||
),
|
||||
)
|
||||
except Exception as _cs3_exec_err:
|
||||
_cs3_exec_success = False if _cs3_auto_approval is not None else None
|
||||
_cs3_exec_error = str(_cs3_exec_err)
|
||||
logger.warning("cs3_llm_auto_execute_failed", error=str(_cs3_exec_err))
|
||||
|
||||
incident_id = await create_incident_for_approval(
|
||||
@@ -1830,7 +2098,7 @@ async def _process_new_alert_background(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
alert_labels=traced_alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
@@ -1846,6 +2114,41 @@ async def _process_new_alert_background(
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
await record_alertmanager_event(
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
repeat_count=1,
|
||||
labels=traced_alert_labels,
|
||||
annotations=alert_context.get("annotations", {}),
|
||||
)
|
||||
|
||||
if _cs3_auto_approval is not None and _cs3_exec_success is not None:
|
||||
try:
|
||||
_cs3_auto_approval.incident_id = incident_id
|
||||
_cs3_executor = _cs3_executor or ApprovalExecutionService()
|
||||
await _cs3_executor.finalize_auto_approved_execution(
|
||||
_cs3_auto_approval,
|
||||
success=_cs3_exec_success,
|
||||
error_message=_cs3_exec_error,
|
||||
)
|
||||
except Exception as _cs3_finalize_err:
|
||||
logger.warning(
|
||||
"cs3_auto_execute_finalize_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(_cs3_finalize_err),
|
||||
)
|
||||
|
||||
root_cause = analysis_result.description or message
|
||||
estimated_downtime = blast.estimated_downtime if blast else "~30s"
|
||||
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
|
||||
@@ -1895,7 +2198,7 @@ async def _process_new_alert_background(
|
||||
risk_level=risk_level.value,
|
||||
resource_name=target_resource,
|
||||
root_cause=root_cause,
|
||||
suggested_action=(analysis_result.kubectl_command or "").strip() or analysis_result.suggested_action.value,
|
||||
suggested_action=approval_create.action,
|
||||
estimated_downtime=estimated_downtime,
|
||||
hit_count=1,
|
||||
primary_responsibility=primary_responsibility,
|
||||
@@ -1921,11 +2224,17 @@ async def _process_new_alert_background(
|
||||
else:
|
||||
# LLM 失敗 - 使用預設值
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_matched_playbook_id_cs4 = await resolve_playbook_id_for_alert(
|
||||
rule_id=str(rule_response.get("rule_id", "")),
|
||||
alertname=alertname,
|
||||
affected_services=[target_resource] if target_resource else [],
|
||||
severity="medium",
|
||||
)
|
||||
_approval_metadata_cs4 = {
|
||||
"source": "fallback",
|
||||
"confidence_score": None,
|
||||
"is_rule_based": False,
|
||||
"playbook_id": None,
|
||||
"playbook_id": _matched_playbook_id_cs4,
|
||||
}
|
||||
fallback_create = ApprovalRequestCreate(
|
||||
action="OBSERVE",
|
||||
@@ -1940,6 +2249,7 @@ async def _process_new_alert_background(
|
||||
dry_run_checks=[],
|
||||
requested_by="OpenClaw (fallback)",
|
||||
metadata=_approval_metadata_cs4,
|
||||
matched_playbook_id=_matched_playbook_id_cs4,
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
@@ -1977,7 +2287,7 @@ async def _process_new_alert_background(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
alert_labels=traced_alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
@@ -1993,6 +2303,55 @@ async def _process_new_alert_background(
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
await record_alertmanager_event(
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
repeat_count=1,
|
||||
labels=traced_alert_labels,
|
||||
annotations=alert_context.get("annotations", {}),
|
||||
)
|
||||
|
||||
_is_heartbeat = is_heartbeat_alertname(alertname)
|
||||
if can_auto_repair and not _is_heartbeat:
|
||||
await _try_auto_repair_background(
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
alert_type=alert_type,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
)
|
||||
elif not can_auto_repair and not _is_heartbeat:
|
||||
from src.repositories.alert_operation_log_repository import get_alert_operation_log_repository
|
||||
_op_log_fallback = get_alert_operation_log_repository()
|
||||
await _op_log_fallback.append(
|
||||
"GUARDRAIL_BLOCKED",
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
actor="prometheus-rule",
|
||||
action_detail=f"Prometheus rule 設定 auto_repair=false,fallback 轉人工: {alertname}",
|
||||
success=False,
|
||||
context={"alertname": alertname, "auto_repair_flag": False},
|
||||
)
|
||||
await _escalate_auto_repair_unavailable(
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
alert_type=alert_type,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
failure_reason="Prometheus rule auto_repair=false,fallback 未進入自動修復評估",
|
||||
attempted_actions="llm_fallback -> guardrail:auto_repair_false -> emergency_intervention",
|
||||
)
|
||||
|
||||
await _push_to_telegram_background(
|
||||
approval_id=str(approval.id),
|
||||
risk_level="medium",
|
||||
@@ -2082,6 +2441,7 @@ async def alertmanager_webhook(
|
||||
# (2026-04-08 Claude Sonnet 4.6 Asia/Taipei,ADR-062 Q9)
|
||||
# ==========================================================================
|
||||
_alert_labels = alert.labels or {}
|
||||
_alert_annotations = alert.annotations or {}
|
||||
_alertname_for_log = _alert_labels.get("alertname", "UnknownAlert")
|
||||
# Q9: auto_repair flag — Rule=false 強制 HITL(不觸發自動修復背景任務)
|
||||
_can_auto_repair_by_rule = _alert_labels.get("auto_repair", "true").lower() == "true"
|
||||
@@ -2097,6 +2457,7 @@ async def alertmanager_webhook(
|
||||
"alert_id": alert_id,
|
||||
"alertname": _alertname_for_log,
|
||||
"labels": _alert_labels,
|
||||
"annotations": _alert_annotations,
|
||||
"auto_repair_flag": _can_auto_repair_by_rule,
|
||||
},
|
||||
)
|
||||
@@ -2125,11 +2486,12 @@ async def alertmanager_webhook(
|
||||
telegram = get_telegram_gateway()
|
||||
# 解析 CI/CD 狀態
|
||||
stage = alert.labels.get("stage", "")
|
||||
job_status = "success" if alert.labels.get("severity") == "info" else "running"
|
||||
job_status = _cicd_job_status_from_alert(alert)
|
||||
commit_sha = alert.labels.get("commit", "")
|
||||
triggered_by = alert.labels.get("triggered_by", "CI")
|
||||
workflow_url = alert.annotations.get("workflow_url", "")
|
||||
summary = alert.annotations.get("summary", alertname)
|
||||
detail_message = alert.annotations.get("description", "")
|
||||
|
||||
await telegram.send_cicd_progress(
|
||||
job_name=summary,
|
||||
@@ -2137,6 +2499,8 @@ async def alertmanager_webhook(
|
||||
stage=stage,
|
||||
commit_sha=commit_sha,
|
||||
triggered_by=triggered_by,
|
||||
duration_seconds=_cicd_duration_seconds_from_alert(alert),
|
||||
message=detail_message,
|
||||
workflow_url=workflow_url,
|
||||
)
|
||||
|
||||
@@ -2236,6 +2600,22 @@ async def alertmanager_webhook(
|
||||
target=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="received",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
source_url=alert.generatorURL,
|
||||
labels=dict(alert.labels) if alert.labels else {},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
# ADR-076: 告警聚合引擎 — 5 分鐘滑動視窗,防止告警風暴
|
||||
@@ -2266,6 +2646,19 @@ async def alertmanager_webhook(
|
||||
parent_fingerprint=grouping_result.parent_fingerprint,
|
||||
reason="Alert storm suppressed — child alert within 5-min window",
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_grouped_alert_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
group_key=grouping_result.group_key,
|
||||
count=grouping_result.count,
|
||||
parent_fingerprint=grouping_result.parent_fingerprint,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message=(
|
||||
@@ -2305,6 +2698,26 @@ async def alertmanager_webhook(
|
||||
hit_count=updated_approval.hit_count,
|
||||
reason="Converged alert - Telegram already sent for this fingerprint",
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="converged",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=getattr(updated_approval, "incident_id", None),
|
||||
approval_id=str(updated_approval.id),
|
||||
repeat_count=updated_approval.hit_count,
|
||||
is_duplicate=True,
|
||||
source_url=alert.generatorURL,
|
||||
labels=dict(alert.labels) if alert.labels else {},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
@@ -2332,10 +2745,27 @@ async def alertmanager_webhook(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert.labels,
|
||||
alert_labels={**alert.labels, "fingerprint": fingerprint, "alert_id": alert_id},
|
||||
notification_type="TYPE-1",
|
||||
alert_category=alert_category,
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type="TYPE-1",
|
||||
alert_category=alert_category,
|
||||
incident_id=_info_incident_id,
|
||||
source_url=alert.generatorURL,
|
||||
labels={**alert.labels, "fingerprint": fingerprint, "alert_id": alert_id},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
# 2026-04-15 ogt: TYPE-1 純資訊告警建立後立即關閉
|
||||
# 設計原則: backup/heartbeat/info 告警無需追蹤狀態,通知即完成
|
||||
# 防止 incidents 表無限累積 INVESTIGATING 記錄(ADR-073 漏洞修補)
|
||||
@@ -2355,7 +2785,7 @@ async def alertmanager_webhook(
|
||||
record_alert_chain_success("alertmanager")
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message=f"✅ TYPE-1 純資訊告警已通知 (no LLM)",
|
||||
message="✅ TYPE-1 純資訊告警已通知 (no LLM)",
|
||||
alert_id=alert_id,
|
||||
approval_created=False,
|
||||
)
|
||||
@@ -2367,6 +2797,23 @@ async def alertmanager_webhook(
|
||||
fingerprint=fingerprint,
|
||||
ttl_seconds=ALERTMANAGER_LLM_INFLIGHT_LOCK_TTL_SECONDS,
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="llm_inflight_suppressed",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
is_duplicate=True,
|
||||
source_url=alert.generatorURL,
|
||||
labels=dict(alert.labels) if alert.labels else {},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message="🛡️ 告警已由同指紋背景 AI 分析處理中,跳過重複 LLM 呼叫",
|
||||
|
||||
126
apps/api/src/core/awooop_operator_auth.py
Normal file
126
apps/api/src/core/awooop_operator_auth.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
AwoooP Operator authentication boundary.
|
||||
|
||||
ADR-116 Gate 5 approval decisions must not trust browser-supplied identities.
|
||||
This module accepts a short-lived operator identity only when it is paired with
|
||||
the server-side AwoooP operator key.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import secrets
|
||||
from dataclasses import dataclass
|
||||
from typing import Annotated
|
||||
|
||||
import structlog
|
||||
from fastapi import Header, HTTPException, status
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
_OPERATOR_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.:@-]{1,127}$")
|
||||
_PROD_ENVS = {"prod", "production"}
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AwoooPOperatorPrincipal:
|
||||
"""Authenticated AwoooP operator principal."""
|
||||
|
||||
operator_id: str
|
||||
auth_method: str
|
||||
|
||||
|
||||
def _auth_error(detail: str = "Operator authentication required") -> HTTPException:
|
||||
return HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=detail)
|
||||
|
||||
|
||||
def _clean_operator_id(operator_id: str | None) -> str:
|
||||
if operator_id is None:
|
||||
raise _auth_error()
|
||||
cleaned = operator_id.strip()
|
||||
if not _OPERATOR_ID_RE.fullmatch(cleaned):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
||||
detail="Invalid operator identity",
|
||||
)
|
||||
return cleaned
|
||||
|
||||
|
||||
def authenticate_awooop_operator_headers(
|
||||
operator_id: str | None,
|
||||
operator_key: str | None,
|
||||
*,
|
||||
configured_key: str | None = None,
|
||||
environment: str | None = None,
|
||||
) -> AwoooPOperatorPrincipal:
|
||||
"""Validate trusted AwoooP operator headers.
|
||||
|
||||
Args:
|
||||
operator_id: Value from ``X-AwoooP-Operator-Id``.
|
||||
operator_key: Value from ``X-AwoooP-Operator-Key``.
|
||||
configured_key: Server-side shared key. Defaults to settings.
|
||||
environment: Runtime environment. Defaults to settings.
|
||||
|
||||
Returns:
|
||||
Authenticated operator principal.
|
||||
|
||||
Raises:
|
||||
HTTPException: 401 when authentication is missing/invalid, or 422 for
|
||||
malformed operator identity.
|
||||
"""
|
||||
cleaned_operator_id = _clean_operator_id(operator_id)
|
||||
expected_key = (
|
||||
settings.AWOOOP_OPERATOR_API_KEY
|
||||
if configured_key is None
|
||||
else configured_key
|
||||
)
|
||||
runtime_env = (environment or settings.ENVIRONMENT or "").lower()
|
||||
|
||||
if not expected_key:
|
||||
if runtime_env in _PROD_ENVS:
|
||||
logger.critical(
|
||||
"awooop_operator_key_missing_in_production",
|
||||
environment=runtime_env,
|
||||
)
|
||||
raise _auth_error()
|
||||
logger.warning(
|
||||
"awooop_operator_key_skipped_dev_only",
|
||||
environment=runtime_env,
|
||||
operator_id=cleaned_operator_id,
|
||||
)
|
||||
return AwoooPOperatorPrincipal(
|
||||
operator_id=cleaned_operator_id,
|
||||
auth_method="dev_header",
|
||||
)
|
||||
|
||||
if not operator_key:
|
||||
logger.warning("awooop_operator_key_missing", operator_id=cleaned_operator_id)
|
||||
raise _auth_error()
|
||||
|
||||
if not secrets.compare_digest(operator_key, expected_key):
|
||||
logger.warning("awooop_operator_key_invalid", operator_id=cleaned_operator_id)
|
||||
raise _auth_error()
|
||||
|
||||
return AwoooPOperatorPrincipal(
|
||||
operator_id=cleaned_operator_id,
|
||||
auth_method="operator_api_key",
|
||||
)
|
||||
|
||||
|
||||
async def verify_awooop_operator(
|
||||
x_awooop_operator_id: Annotated[
|
||||
str | None,
|
||||
Header(alias="X-AwoooP-Operator-Id"),
|
||||
] = None,
|
||||
x_awooop_operator_key: Annotated[
|
||||
str | None,
|
||||
Header(alias="X-AwoooP-Operator-Key"),
|
||||
] = None,
|
||||
) -> AwoooPOperatorPrincipal:
|
||||
"""FastAPI dependency for operator mutation endpoints."""
|
||||
return authenticate_awooop_operator_headers(
|
||||
operator_id=x_awooop_operator_id,
|
||||
operator_key=x_awooop_operator_key,
|
||||
)
|
||||
@@ -145,7 +145,7 @@ class Settings(BaseSettings):
|
||||
# ==========================================================================
|
||||
# ADR-104: LLM Playbook Generator
|
||||
# 成功修復且未命中既有 Playbook 時,用本地 LLM 生成 DRAFT/REVIEW Playbook。
|
||||
# 成本護欄:實作層只走 local provider(Ollama 111 → Ollama 188),不新增雲端 fallback。
|
||||
# 成本護欄:實作層只走 local provider(GCP-A → GCP-B → 111),不新增雲端 fallback。
|
||||
# 回滾指令: kubectl set env deployment/awoooi-api ENABLE_LLM_PLAYBOOK_GENERATION=false
|
||||
# ==========================================================================
|
||||
ENABLE_LLM_PLAYBOOK_GENERATION: bool = Field(
|
||||
@@ -215,8 +215,8 @@ class Settings(BaseSettings):
|
||||
description="Phase 25 P0: DIAGNOSE NIM timeout (秒),實測 2.2-27.3s avg 10.6s,60s 含 buffer",
|
||||
)
|
||||
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
|
||||
default=200,
|
||||
description="Phase 25 P0: Ollama timeout (秒),實測 CPU-only 238s,保留欄位但 DIAGNOSE 不再走 Ollama",
|
||||
default=300,
|
||||
description="Ollama diagnose timeout (秒)。GCP qwen3:14b CPU-only can exceed the old 120s proxy limit.",
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
@@ -370,11 +370,16 @@ class Settings(BaseSettings):
|
||||
)
|
||||
return v
|
||||
|
||||
# 2026-04-25 Claude Engineer-C (P1.1): Ollama 健康檢測推理測試模型
|
||||
# 2026-05-05 Codex: health inference must stay on alert-fast model; qwen2.5
|
||||
# keeps reloading a 7B model on CPU-only GCP and slows incident fallback.
|
||||
OLLAMA_HEALTH_CHECK_MODEL: str = Field(
|
||||
default="qwen2.5:7b-instruct",
|
||||
default="gemma3:4b",
|
||||
description="OllamaHealthMonitor 推理測試使用模型(P1.1)",
|
||||
)
|
||||
OLLAMA_EMBEDDING_MODEL: str = Field(
|
||||
default="bge-m3:latest",
|
||||
description="Ollama embedding model. ADR-110 migrated embeddings from nomic-embed-text to bge-m3.",
|
||||
)
|
||||
# 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單
|
||||
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級,更新必要模型清單(nomic→bge-m3 + 新增 qwen3:14b + hermes3)
|
||||
OLLAMA_REQUIRED_MODELS: list[str] = Field(
|
||||
@@ -500,10 +505,42 @@ class Settings(BaseSettings):
|
||||
default=False,
|
||||
description=(
|
||||
"Allow LocalCodeReviewService to fall back to Gemini when the "
|
||||
"GCP-B/Ollama code-review lane fails. Default false to avoid "
|
||||
"local Ollama code-review lane fails. Default false to avoid "
|
||||
"unexpected cloud spend from Gitea push/PR alerts."
|
||||
),
|
||||
)
|
||||
ALERT_AI_ALLOW_CLOUD_FALLBACK: bool = Field(
|
||||
default=True,
|
||||
description=(
|
||||
"Allow incident/alert OpenClaw analysis to use cloud fallback "
|
||||
"providers after the GCP-A/GCP-B/111 Ollama lane is exhausted. "
|
||||
"Default true so Gemini can act as the final backup, after the "
|
||||
"ordered Ollama lane is exhausted."
|
||||
),
|
||||
)
|
||||
ALERT_AI_ENFORCE_OLLAMA_FIRST: bool = Field(
|
||||
default=True,
|
||||
description=(
|
||||
"Force incident/alert OpenClaw analysis to try GCP-A, then GCP-B, "
|
||||
"then local 111 before cloud backup providers such as Gemini."
|
||||
),
|
||||
)
|
||||
ALERT_OLLAMA_MODEL: str = Field(
|
||||
default="qwen3:14b",
|
||||
description=(
|
||||
"Ollama model used for incident/alert deep diagnosis. Alert cards "
|
||||
"may wait for this model; Gemini remains a backup after GCP-A, "
|
||||
"GCP-B, and 111 fail."
|
||||
),
|
||||
)
|
||||
INCIDENT_LLM_TIMEOUT_SECONDS: int = Field(
|
||||
default=360,
|
||||
description=(
|
||||
"Outer timeout for incident OpenClaw proposal generation. This must "
|
||||
"be long enough for the GCP-A/GCP-B/111 Ollama lane to complete "
|
||||
"before Gemini backup is considered useful."
|
||||
),
|
||||
)
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
|
||||
NVIDIA_API_KEY: str = Field(
|
||||
default="",
|
||||
@@ -565,6 +602,77 @@ class Settings(BaseSettings):
|
||||
default="",
|
||||
description="API Key for K8s admin endpoints (X-K8s-Api-Key header)",
|
||||
)
|
||||
AWOOOP_OPERATOR_API_KEY: str = Field(
|
||||
default="",
|
||||
description=(
|
||||
"API key for AwoooP operator mutation endpoints "
|
||||
"(X-AwoooP-Operator-Key header)"
|
||||
),
|
||||
)
|
||||
ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"True=consume ansible_candidate_matched AOL rows and run "
|
||||
"ansible-playbook --check --diff only. Apply remains disabled."
|
||||
),
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS: int = Field(
|
||||
default=300,
|
||||
ge=60,
|
||||
description="AwoooP Ansible check-mode worker polling interval.",
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT: int = Field(
|
||||
default=1,
|
||||
ge=1,
|
||||
le=5,
|
||||
description="Maximum Ansible check-mode candidates claimed per worker tick.",
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS: int = Field(
|
||||
default=180,
|
||||
ge=30,
|
||||
le=600,
|
||||
description="Timeout for one ansible-playbook --check --diff execution.",
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_STARTUP_SLEEP_SECONDS: int = Field(
|
||||
default=120,
|
||||
ge=0,
|
||||
le=900,
|
||||
description="Delay before the check-mode worker first tick after API startup.",
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_TRANSPORT_PROFILE: str = Field(
|
||||
default="ssh_mcp",
|
||||
description=(
|
||||
"SSH transport profile used by Ansible check-mode. Production uses "
|
||||
"the existing ssh-mcp key so repair-bot forced-command remains reserved "
|
||||
"for whitelist repairs."
|
||||
),
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_SSH_KEY_PATH: str = Field(
|
||||
default="/run/secrets/ssh_mcp_key",
|
||||
description="Private key path for Ansible check-mode SSH transport.",
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_KNOWN_HOSTS_PATH: str = Field(
|
||||
default="/etc/ssh-mcp/known_hosts",
|
||||
description="known_hosts path for Ansible check-mode SSH transport.",
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_CANDIDATE_MAX_AGE_HOURS: int = Field(
|
||||
default=24,
|
||||
ge=1,
|
||||
le=168,
|
||||
description=(
|
||||
"Only recent Ansible candidate audit rows are eligible for automatic "
|
||||
"check-mode claims; older backlog remains visible but is not drained as noise."
|
||||
),
|
||||
)
|
||||
AWOOOP_ANSIBLE_CHECK_MODE_TRANSPORT_COOLDOWN_SECONDS: int = Field(
|
||||
default=21_600,
|
||||
ge=300,
|
||||
le=86_400,
|
||||
description=(
|
||||
"Cooldown after transport-level check-mode blockers such as "
|
||||
"forced-command repair SSH denial."
|
||||
),
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
# 統帥鐵律:禁止 SQLite (AWOOOI 憲法)
|
||||
@@ -855,7 +963,7 @@ class Settings(BaseSettings):
|
||||
# ==========================================================================
|
||||
# MCP Phase 2b: Prometheus MCP Server (ADR-071, 2026-04-11 Claude Sonnet 4.6)
|
||||
# ==========================================================================
|
||||
# 2026-04-29 ogt + Claude Opus 4.7: drift fix — 188 是 Ollama Hub,Prometheus 實際在 110
|
||||
# 2026-04-29 ogt + Claude Opus 4.7: drift fix — Prometheus 實際在 110
|
||||
# ConfigMap 04-configmap.yaml 也是 110;governance_agent / SLO check 連 188 會 timeout
|
||||
# 此 drift 是 SPF-4 (governance_agent silently fail) 根因之一
|
||||
PROMETHEUS_URL: str = Field(
|
||||
@@ -929,7 +1037,7 @@ class Settings(BaseSettings):
|
||||
"devops": "192.168.0.110", # Harbor, GH Runner
|
||||
"security": "192.168.0.112", # Kali Scanner
|
||||
"k3s_master": "192.168.0.120", # K3s Master
|
||||
"ai_web": "192.168.0.188", # Nginx, Postgres, Redis, Ollama
|
||||
"ai_web": "192.168.0.188", # Nginx, Postgres, Redis, SignOz
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -37,8 +37,8 @@ REDIS_KEY_DECISION = "decision:"
|
||||
APPROVAL_TO_INCIDENT_STATUS = {
|
||||
"pending": "investigating",
|
||||
"approved": "resolved",
|
||||
"rejected": "rejected",
|
||||
"expired": "expired",
|
||||
"rejected": "escalated",
|
||||
"expired": "escalated",
|
||||
}
|
||||
|
||||
# Incident 狀態 → 是否活躍
|
||||
|
||||
@@ -11,6 +11,7 @@ Features:
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
@@ -19,6 +20,28 @@ from structlog.types import Processor
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
_TELEGRAM_BOT_URL_RE = re.compile(r"(api\.telegram\.org/bot)[^/\s]+")
|
||||
|
||||
|
||||
def _redact_sensitive_log_text(text: str) -> str:
|
||||
"""遮蔽可能出現在第三方 logger 訊息中的敏感 URL。"""
|
||||
return _TELEGRAM_BOT_URL_RE.sub(r"\1<redacted>", text)
|
||||
|
||||
|
||||
class SensitiveURLRedactionFilter(logging.Filter):
|
||||
"""標準 logging filter:避免 httpx 等第三方 logger 把 token URL 打進 log。"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
record.msg = _redact_sensitive_log_text(str(record.msg))
|
||||
if isinstance(record.args, tuple):
|
||||
record.args = tuple(_redact_sensitive_log_text(str(arg)) for arg in record.args)
|
||||
elif isinstance(record.args, dict):
|
||||
record.args = {
|
||||
key: _redact_sensitive_log_text(str(value))
|
||||
for key, value in record.args.items()
|
||||
}
|
||||
return True
|
||||
|
||||
|
||||
def setup_logging() -> None:
|
||||
"""Configure structlog for the application"""
|
||||
@@ -68,6 +91,15 @@ def setup_logging() -> None:
|
||||
stream=sys.stdout,
|
||||
level=logging.getLevelName(settings.LOG_LEVEL),
|
||||
)
|
||||
redaction_filter = SensitiveURLRedactionFilter()
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addFilter(redaction_filter)
|
||||
for handler in root_logger.handlers:
|
||||
handler.addFilter(redaction_filter)
|
||||
|
||||
# httpx INFO 會輸出完整 request URL;Telegram Bot API URL 內含 token。
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def get_logger(name: str | None = None, **initial_context: Any) -> structlog.BoundLogger:
|
||||
|
||||
@@ -17,6 +17,7 @@ PostgreSQL 事務管理器,確保多表操作原子性。
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -49,14 +50,20 @@ class UnitOfWork:
|
||||
- Redis 操作失敗時必須手動呼叫 rollback()
|
||||
"""
|
||||
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]):
|
||||
def __init__(
|
||||
self,
|
||||
session_factory: async_sessionmaker[AsyncSession],
|
||||
project_id: str | None = None,
|
||||
):
|
||||
"""
|
||||
初始化 UnitOfWork
|
||||
|
||||
Args:
|
||||
session_factory: SQLAlchemy async session factory
|
||||
project_id: RLS project context. None means contextvar/default awoooi.
|
||||
"""
|
||||
self._session_factory = session_factory
|
||||
self._project_id = project_id
|
||||
self._session: AsyncSession | None = None
|
||||
self._committed = False
|
||||
|
||||
@@ -74,9 +81,18 @@ class UnitOfWork:
|
||||
|
||||
async def __aenter__(self) -> "UnitOfWork":
|
||||
"""進入事務"""
|
||||
from src.core.context import get_current_project_id
|
||||
|
||||
self._session = self._session_factory()
|
||||
effective_pid = (
|
||||
self._project_id if self._project_id is not None else get_current_project_id()
|
||||
)
|
||||
await self._session.execute(
|
||||
text("SELECT set_config('app.project_id', :pid, TRUE)"),
|
||||
{"pid": effective_pid},
|
||||
)
|
||||
self._committed = False
|
||||
logger.debug("uow_started")
|
||||
logger.debug("uow_started", project_id=effective_pid)
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
|
||||
@@ -10,7 +10,7 @@ from __future__ import annotations
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
from uuid import UUID, uuid4
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
@@ -577,8 +577,8 @@ class AwoooPMcpGatewayAudit(Base):
|
||||
run_id: Mapped[UUID | None] = mapped_column(nullable=True)
|
||||
trace_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
agent_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
tool_id: Mapped[UUID] = mapped_column(
|
||||
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=False
|
||||
tool_id: Mapped[UUID | None] = mapped_column(
|
||||
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=True
|
||||
)
|
||||
tool_name: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
credential_ref: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
@@ -635,6 +635,13 @@ class AwoooPConversationEvent(Base):
|
||||
content_type: Mapped[str] = mapped_column(String(32), nullable=False, default="text")
|
||||
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
redaction_version: Mapped[str] = mapped_column(
|
||||
String(32), nullable=False, server_default=text("'audit_sink_v1'")
|
||||
)
|
||||
source_envelope: Mapped[dict[str, Any]] = mapped_column(
|
||||
JSONB, nullable=False, server_default=text("'{}'::jsonb")
|
||||
)
|
||||
attachment_sha256: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
is_duplicate: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
provider_ts: Mapped[datetime | None] = mapped_column(nullable=True)
|
||||
@@ -680,6 +687,13 @@ class AwoooPOutboundMessage(Base):
|
||||
message_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
redaction_version: Mapped[str] = mapped_column(
|
||||
String(32), nullable=False, server_default=text("'audit_sink_v1'")
|
||||
)
|
||||
source_envelope: Mapped[dict[str, Any]] = mapped_column(
|
||||
JSONB, nullable=False, server_default=text("'{}'::jsonb")
|
||||
)
|
||||
provider_message_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
send_status: Mapped[str] = mapped_column(String(16), nullable=False, default="pending")
|
||||
send_error: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
@@ -106,10 +106,13 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
factory = get_session_factory()
|
||||
async with factory() as session:
|
||||
try:
|
||||
from src.core.context import get_current_project_id
|
||||
|
||||
# AwoooP Phase 2.3 (2026-05-04 ogt): SET LOCAL app.project_id 讓 RLS Policy 生效
|
||||
# 預設 'awoooi',多租戶路由將在 middleware 注入實際 project_id
|
||||
# 預設 'awoooi',多租戶路由將透過 contextvar 注入實際 project_id
|
||||
await session.execute(
|
||||
text("SELECT set_config('app.project_id', 'awoooi', TRUE)")
|
||||
text("SELECT set_config('app.project_id', :pid, TRUE)"),
|
||||
{"pid": get_current_project_id()},
|
||||
)
|
||||
yield session
|
||||
await session.commit()
|
||||
@@ -154,6 +157,9 @@ async def get_db_context(project_id: str | None = None) -> AsyncGenerator[AsyncS
|
||||
# Initialization
|
||||
# =============================================================================
|
||||
|
||||
_DB_BOOTSTRAP_LOCK_NAME = "awoooi:init_db:ddl"
|
||||
|
||||
|
||||
async def init_db() -> None:
|
||||
"""
|
||||
Initialize database tables
|
||||
@@ -162,6 +168,28 @@ async def init_db() -> None:
|
||||
"""
|
||||
engine = get_engine()
|
||||
|
||||
async with engine.connect() as lock_conn:
|
||||
# 2026-05-24 ogt + Codex: 兩個 API replica 同時啟動時,PostgreSQL 會在
|
||||
# ALTER TABLE ... IF NOT EXISTS 上互相等待並 deadlock。整段 bootstrap
|
||||
# DDL 必須序列化,避免 rollout 因一個 pod CrashLoop 變成 1/2 ready。
|
||||
await lock_conn.execute(
|
||||
text("SELECT pg_advisory_lock(hashtext(:lock_name))"),
|
||||
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
|
||||
)
|
||||
try:
|
||||
await _run_init_db_ddl(engine)
|
||||
finally:
|
||||
await lock_conn.execute(
|
||||
text("SELECT pg_advisory_unlock(hashtext(:lock_name))"),
|
||||
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
|
||||
)
|
||||
|
||||
|
||||
async def _run_init_db_ddl(engine: AsyncEngine) -> None:
|
||||
"""
|
||||
Run idempotent DB bootstrap DDL while caller holds the bootstrap advisory lock.
|
||||
"""
|
||||
|
||||
# 2026-04-15 ogt: 多 replica 並行啟動競爭修復
|
||||
# 問題:單一大 transaction 裡兩個 pod 同時建 table → 其中一個 CREATE INDEX 失敗
|
||||
# PostgreSQL 中 transaction 內任何錯誤導致整個 transaction ROLLBACK
|
||||
|
||||
@@ -633,6 +633,8 @@ class AlertOperationLog(Base):
|
||||
"RESOLVED", "SILENCED", "ESCALATED", "GUARDRAIL_BLOCKED",
|
||||
"PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED", "BACKUP_TRIGGERED",
|
||||
"BACKUP_COMPLETED", "BACKUP_FAILED", "APPROVAL_ESCALATED", "CHANGE_APPLIED",
|
||||
"NOTIFICATION_CLASSIFIED", "MANUAL_FIX_RECORDED", "KM_CONVERTED",
|
||||
"PLAYBOOK_DRAFT_CREATED", "STATE_GUARD_BLOCKED",
|
||||
name="alert_event_type", create_type=False,
|
||||
),
|
||||
nullable=False, index=True,
|
||||
|
||||
@@ -9,6 +9,7 @@ Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型(111)→ Tel
|
||||
debugger/vuln → deepseek-r1:14b(推理); code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
@@ -17,12 +18,12 @@ import httpx
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.core.config import settings
|
||||
from src.core.redis_client import get_redis
|
||||
from src.db.base import get_db_context
|
||||
from src.hermes.agent_loader import get_agent_system_prompt
|
||||
from src.hermes.display_names import DEFAULT_AGENT, format_response_header
|
||||
from src.hermes.safety_hooks import is_dangerous_input, is_mutate_intent
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
@@ -261,42 +262,48 @@ async def process_nl_message(
|
||||
|
||||
t0 = time.monotonic()
|
||||
|
||||
# 呼叫 Ollama 本地模型(111,零費用,按 agent 選模型)
|
||||
# 呼叫 Ollama 模型(GCP-A → GCP-B → 111,零費用,按 agent 選模型)
|
||||
model = _pick_model(agent_name)
|
||||
success = False
|
||||
error_type: str | None = None
|
||||
try:
|
||||
ollama_base = getattr(settings, "OLLAMA_URL", "http://34.143.170.20:11434") # 2026-05-03 ogt: ADR-110 GCP-A Primary
|
||||
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
|
||||
resp = await _hc.post(
|
||||
f"{ollama_base}/api/chat",
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt_with_ctx},
|
||||
],
|
||||
"stream": False,
|
||||
"options": {"num_predict": 1500, "temperature": 0.3},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
result_text = resp.json().get("message", {}).get("content", "")
|
||||
|
||||
result_text = _strip_think_tags(result_text)
|
||||
if not result_text:
|
||||
result_text = "_Agent 回應為空,請稍後再試。_"
|
||||
success = True
|
||||
|
||||
except Exception as exc:
|
||||
error_type = type(exc).__name__
|
||||
logger.error(
|
||||
"hermes_nl_ollama_error",
|
||||
error=str(exc),
|
||||
agent=agent_name,
|
||||
model=model,
|
||||
exc_type=error_type,
|
||||
)
|
||||
result_text = ""
|
||||
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
|
||||
for endpoint in resolve_ollama_order("hermes"):
|
||||
if not endpoint.url:
|
||||
continue
|
||||
try:
|
||||
resp = await _hc.post(
|
||||
f"{endpoint.url}/api/chat",
|
||||
json={
|
||||
"model": model,
|
||||
# Keep Hermes responses in message.content across Ollama 0.24+.
|
||||
"think": False,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt_with_ctx},
|
||||
],
|
||||
"stream": False,
|
||||
"options": {"num_predict": 1500, "temperature": 0.3},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
result_text = resp.json().get("message", {}).get("content", "")
|
||||
result_text = _strip_think_tags(result_text)
|
||||
if not result_text:
|
||||
result_text = "_Agent 回應為空,請稍後再試。_"
|
||||
success = True
|
||||
break
|
||||
except Exception as exc:
|
||||
error_type = type(exc).__name__
|
||||
logger.error(
|
||||
"hermes_nl_ollama_error",
|
||||
error=str(exc),
|
||||
agent=agent_name,
|
||||
model=model,
|
||||
provider=endpoint.provider_name,
|
||||
exc_type=error_type,
|
||||
)
|
||||
if not success:
|
||||
result_text = f"_Hermes 暫時無法連線({error_type}),請稍後再試。_"
|
||||
|
||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||
|
||||
@@ -46,6 +46,7 @@ _DEDUP_TTL_SEC = 3600 # 同一告警 1 小時內不重複發送
|
||||
_TG_SILENCE_THRESHOLD = 2 # PENDING telegram_message_id IS NULL 告警門檻
|
||||
_FLYWHEEL_SUCCESS_MIN = 0.30 # 執行成功率下限
|
||||
_STUCK_ANALYSIS_THRESHOLD = 3 # Agent Debate 失敗導致卡住的告警門檻
|
||||
_TRUST_DRIFT_META_MIN_RATIO = 0.20 # 低於此比例只記治理事件,不升 Meta System
|
||||
|
||||
# 2026-05-03 ogt + Claude Opus 4.7 — feedback_silencing_alerts_recurring_violation
|
||||
# 啟動寬限期:30 分鐘內可 skip「資料還沒到」噪音;超過寬限期仍空 = 真資料管線斷,必須告警
|
||||
@@ -210,7 +211,8 @@ async def _check_once() -> None:
|
||||
from src.services.governance_agent import get_governance_agent
|
||||
trust_result = await get_governance_agent().check_trust_drift(emit_alert=False)
|
||||
drifted = trust_result.get("drifted", 0)
|
||||
if drifted > 0:
|
||||
drift_ratio = float(trust_result.get("drift_ratio") or 0.0)
|
||||
if drifted > 0 and drift_ratio >= _TRUST_DRIFT_META_MIN_RATIO:
|
||||
auto_deprecated = trust_result.get("auto_deprecated", 0)
|
||||
kept = trust_result.get("kept", 0)
|
||||
violations.append(
|
||||
@@ -219,6 +221,13 @@ async def _check_once() -> None:
|
||||
)
|
||||
# 2026-05-05 ogt W6 修復:移除動態 low_count,避免 count 微變繞過 dedup
|
||||
violation_codes.append("W6:trust_drift")
|
||||
elif drifted > 0:
|
||||
logger.info(
|
||||
"watchdog_w6_trust_drift_below_meta_threshold",
|
||||
drifted=drifted,
|
||||
drift_ratio=round(drift_ratio, 3),
|
||||
threshold=_TRUST_DRIFT_META_MIN_RATIO,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e))
|
||||
|
||||
|
||||
@@ -479,7 +479,7 @@ async def _collect_all_k8s_assets() -> tuple[list[dict[str, Any]], list[dict[str
|
||||
|
||||
# 6. Prometheus targets — 補齊 host-install services (110/112/188/125 等非 K8s)
|
||||
# Gap 1 修補 (2026-04-19 audit): 原本 asset_inventory 只涵蓋 K8s,
|
||||
# 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis/Ollama host-install 全漏
|
||||
# 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis host-install 全漏
|
||||
# 用 Prometheus /api/v1/targets 自動發現全節點服務
|
||||
try:
|
||||
prom_assets, host_relationships = await _collect_prometheus_targets()
|
||||
|
||||
44
apps/api/src/jobs/awooop_ansible_check_mode_job.py
Normal file
44
apps/api/src/jobs/awooop_ansible_check_mode_job.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""AwoooP Ansible check-mode worker loop.
|
||||
|
||||
Runs only when explicitly enabled by settings. The worker consumes pending
|
||||
``ansible_candidate_matched`` rows and records check-mode evidence; it never
|
||||
executes Ansible apply.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import structlog
|
||||
|
||||
from src.core.config import settings
|
||||
from src.services.awooop_ansible_check_mode_service import run_pending_check_modes_once
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
async def run_awooop_ansible_check_mode_loop() -> None:
|
||||
if not settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER:
|
||||
logger.info("awooop_ansible_check_mode_worker_disabled")
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"awooop_ansible_check_mode_worker_started",
|
||||
interval_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS,
|
||||
batch_limit=settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT,
|
||||
timeout_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS,
|
||||
)
|
||||
await asyncio.sleep(settings.AWOOOP_ANSIBLE_CHECK_MODE_STARTUP_SLEEP_SECONDS)
|
||||
|
||||
while True:
|
||||
try:
|
||||
result = await run_pending_check_modes_once(
|
||||
limit=settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT,
|
||||
timeout_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS,
|
||||
)
|
||||
if result.get("claimed") or result.get("blockers"):
|
||||
logger.info("awooop_ansible_check_mode_worker_tick", **result)
|
||||
except Exception as exc:
|
||||
logger.warning("awooop_ansible_check_mode_worker_failed", error=str(exc))
|
||||
|
||||
await asyncio.sleep(settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS)
|
||||
@@ -172,7 +172,7 @@ _LLM_FORECAST_PROMPT = """你是 AWOOOI 容量規劃專家。以下 host 過去
|
||||
{findings_json}
|
||||
|
||||
## 當前主機環境資訊
|
||||
- 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/Ollama/MinIO)
|
||||
- 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/MinIO)
|
||||
- 判斷請考慮: 該主機上跑什麼服務、常見瓶頸模式
|
||||
|
||||
## 輸出規格 (必須是合法 JSON,純 JSON 無前後文字)
|
||||
|
||||
308
apps/api/src/jobs/hermes_kb_growth_worker.py
Normal file
308
apps/api/src/jobs/hermes_kb_growth_worker.py
Normal file
@@ -0,0 +1,308 @@
|
||||
"""
|
||||
Hermes KB Growth Worker
|
||||
=======================
|
||||
|
||||
消費 governance_remediation_dispatch 中的 hermes_kb_growth_healthcheck work item,
|
||||
把 knowledge_degradation 告警推進成可審核的 KM 草稿。
|
||||
|
||||
邊界:
|
||||
- 可以建立 REVIEW 狀態的 auto_runbook 草稿,讓 owner 在前端審核。
|
||||
- 不可以直接把 KM 標成 APPROVED / PUBLISHED。
|
||||
- 不修改 immutable ai_governance_events;流程進度寫回 dispatch.decision_context。
|
||||
|
||||
2026-05-19 ogt + Codex: T90 Hermes KB growth healthcheck worker。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import GovernanceRemediationDispatch
|
||||
from src.models.knowledge import (
|
||||
EntrySource,
|
||||
EntryStatus,
|
||||
EntryType,
|
||||
KnowledgeEntry,
|
||||
KnowledgeEntryCreate,
|
||||
)
|
||||
from src.repositories.governance_remediation_dispatch_repo import (
|
||||
InvalidStatusTransition,
|
||||
list_pending_by_executor,
|
||||
transition_status,
|
||||
update_decision_context,
|
||||
)
|
||||
from src.repositories.knowledge_repository import KnowledgeDBRepository
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
EXECUTOR_TYPE = "hermes_kb_growth_healthcheck"
|
||||
DEFAULT_INTERVAL_SECONDS = 300
|
||||
DEFAULT_LIMIT = 20
|
||||
|
||||
|
||||
async def run_hermes_kb_growth_once(limit: int = DEFAULT_LIMIT) -> dict[str, int]:
|
||||
"""執行一輪 Hermes KB growth healthcheck。
|
||||
|
||||
Returns:
|
||||
統計資訊,供 log / smoke test 判讀。
|
||||
"""
|
||||
rows = await list_pending_by_executor(EXECUTOR_TYPE, limit=limit)
|
||||
result = {
|
||||
"scanned": len(rows),
|
||||
"processed": 0,
|
||||
"skipped": 0,
|
||||
"failed": 0,
|
||||
}
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
await _process_dispatch(row)
|
||||
result["processed"] += 1
|
||||
except InvalidStatusTransition as exc:
|
||||
result["skipped"] += 1
|
||||
logger.info(
|
||||
"hermes_kb_growth_dispatch_skipped",
|
||||
dispatch_id=row.id,
|
||||
event_id=row.governance_event_id,
|
||||
reason=str(exc),
|
||||
)
|
||||
except Exception as exc:
|
||||
result["failed"] += 1
|
||||
logger.exception(
|
||||
"hermes_kb_growth_dispatch_failed",
|
||||
dispatch_id=row.id,
|
||||
event_id=row.governance_event_id,
|
||||
error=str(exc),
|
||||
)
|
||||
await _mark_failed_if_started(row.id, str(exc))
|
||||
|
||||
if any(result.values()):
|
||||
logger.info("hermes_kb_growth_once_completed", **result)
|
||||
return result
|
||||
|
||||
|
||||
async def run_hermes_kb_growth_loop(
|
||||
interval_seconds: int = DEFAULT_INTERVAL_SECONDS,
|
||||
limit: int = DEFAULT_LIMIT,
|
||||
) -> None:
|
||||
"""背景 loop:定期消費 Hermes KB growth dispatch。"""
|
||||
logger.info(
|
||||
"hermes_kb_growth_loop_started",
|
||||
interval_seconds=interval_seconds,
|
||||
limit=limit,
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
await run_hermes_kb_growth_once(limit=limit)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.exception("hermes_kb_growth_loop_error", error=str(exc))
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
|
||||
async def _process_dispatch(row: GovernanceRemediationDispatch) -> None:
|
||||
"""處理單筆 pending dispatch,最後停在 waiting_owner_review。"""
|
||||
dispatched = await transition_status(row.id, "pending", "dispatched")
|
||||
executing = await transition_status(dispatched.id, "dispatched", "executing")
|
||||
|
||||
km_entry = await _create_or_get_km_review_draft(executing)
|
||||
updated_context = _build_review_context(
|
||||
executing.decision_context or {},
|
||||
dispatch_id=executing.id,
|
||||
governance_event_id=executing.governance_event_id,
|
||||
km_entry_id=km_entry.id,
|
||||
)
|
||||
await update_decision_context(executing.id, updated_context)
|
||||
await transition_status(executing.id, "executing", "succeeded")
|
||||
|
||||
logger.info(
|
||||
"hermes_kb_growth_review_draft_ready",
|
||||
dispatch_id=executing.id,
|
||||
event_id=executing.governance_event_id,
|
||||
km_entry_id=km_entry.id,
|
||||
workflow_stage="waiting_owner_review",
|
||||
)
|
||||
|
||||
|
||||
async def _create_or_get_km_review_draft(
|
||||
dispatch: GovernanceRemediationDispatch,
|
||||
) -> KnowledgeEntry:
|
||||
"""以 governance event tag 做冪等,建立或取得 REVIEW 狀態 KM 草稿。"""
|
||||
dispatch_tag = f"dispatch:{dispatch.id}"
|
||||
event_tag = f"governance_event:{dispatch.governance_event_id}"
|
||||
payload = _build_km_review_entry_payload(dispatch)
|
||||
|
||||
async with get_db_context() as db:
|
||||
repo = KnowledgeDBRepository(db)
|
||||
existing, _ = await repo.list_entries(tags=[event_tag], limit=1)
|
||||
if existing:
|
||||
return existing[0]
|
||||
existing, _ = await repo.list_entries(tags=[dispatch_tag], limit=1)
|
||||
if existing:
|
||||
return existing[0]
|
||||
return await repo.create(payload)
|
||||
|
||||
|
||||
def _build_km_review_entry_payload(
|
||||
dispatch: GovernanceRemediationDispatch,
|
||||
) -> KnowledgeEntryCreate:
|
||||
"""把 governance dispatch 轉成待審核的 KM 草稿 payload。"""
|
||||
context = dispatch.decision_context or {}
|
||||
workflow = context.get("workflow") if isinstance(context.get("workflow"), dict) else {}
|
||||
impact = workflow.get("impact") if isinstance(workflow.get("impact"), dict) else {}
|
||||
extra = context.get("extra") if isinstance(context.get("extra"), dict) else {}
|
||||
ownership = context.get("ownership") if isinstance(context.get("ownership"), dict) else {}
|
||||
if not ownership and isinstance(extra.get("ownership"), dict):
|
||||
ownership = extra["ownership"]
|
||||
|
||||
stale_count = _pick_first(impact, extra, key="stale_count")
|
||||
total_count = _pick_first(impact, extra, key="total_count")
|
||||
stale_ratio = _pick_first(impact, context, key="stale_ratio")
|
||||
threshold = _pick_first(impact, context, key="threshold")
|
||||
stale_days = _pick_first(impact, extra, key="stale_days")
|
||||
lead_agent = ownership.get("lead_agent") or "Hermes"
|
||||
human_owner = ownership.get("human_owner") or "KM owner / SRE owner"
|
||||
|
||||
content = "\n".join([
|
||||
"# KM 健康檢查草稿",
|
||||
"",
|
||||
"## 來源",
|
||||
f"- governance_event_id: {dispatch.governance_event_id}",
|
||||
f"- dispatch_id: {dispatch.id}",
|
||||
f"- executor_type: {dispatch.executor_type}",
|
||||
"",
|
||||
"## 影響摘要",
|
||||
f"- stale_count: {_format_unknown(stale_count)}",
|
||||
f"- total_count: {_format_unknown(total_count)}",
|
||||
f"- stale_ratio: {_format_ratio(stale_ratio)}",
|
||||
f"- threshold: {_format_ratio(threshold)}",
|
||||
f"- stale_days: {_format_unknown(stale_days)}",
|
||||
"",
|
||||
"## AI 已完成",
|
||||
"- Hermes 已接手 knowledge_degradation dispatch。",
|
||||
"- 已產生 KM 更新草稿與 owner review work item。",
|
||||
"- 尚未把任何條目標成 approved / published。",
|
||||
"",
|
||||
"## Owner 審核重點",
|
||||
"- 優先反查最近被 Incident、Sentry、SigNoz、PlayBook 引用的 KM。",
|
||||
"- 確認草稿內容沒有把過期處置方式寫回正式知識庫。",
|
||||
"- 審核通過後再進入 km_writeback_after_approval。",
|
||||
"",
|
||||
"## 安全邊界",
|
||||
"- writes_km_without_approval=false",
|
||||
f"- lead_agent={lead_agent}",
|
||||
f"- human_owner={human_owner}",
|
||||
])
|
||||
|
||||
return KnowledgeEntryCreate(
|
||||
title=f"KM healthcheck review draft - {dispatch.governance_event_id[:8]}",
|
||||
content=content,
|
||||
entry_type=EntryType.AUTO_RUNBOOK,
|
||||
category="AI治理",
|
||||
tags=[
|
||||
"governance:knowledge_degradation",
|
||||
"workflow:kb_growth_healthcheck",
|
||||
"stage:waiting_owner_review",
|
||||
"agent:Hermes",
|
||||
"needs_owner_review",
|
||||
f"dispatch:{dispatch.id}",
|
||||
f"governance_event:{dispatch.governance_event_id}",
|
||||
],
|
||||
source=EntrySource.AI_EXTRACTED,
|
||||
status=EntryStatus.REVIEW,
|
||||
path_type="hermes_kb_growth_healthcheck",
|
||||
created_by="hermes_kb_growth_worker",
|
||||
)
|
||||
|
||||
|
||||
def _build_review_context(
|
||||
context: dict[str, Any],
|
||||
*,
|
||||
dispatch_id: str,
|
||||
governance_event_id: str,
|
||||
km_entry_id: str,
|
||||
) -> dict[str, Any]:
|
||||
"""更新 dispatch read model,讓 Work Items/Telegram 可見目前停在 owner review。"""
|
||||
updated = deepcopy(context)
|
||||
workflow = updated.setdefault("workflow", {})
|
||||
if not isinstance(workflow, dict):
|
||||
workflow = {}
|
||||
updated["workflow"] = workflow
|
||||
|
||||
stages = workflow.setdefault("stage_by_dispatch_status", {})
|
||||
if not isinstance(stages, dict):
|
||||
stages = {}
|
||||
workflow["stage_by_dispatch_status"] = stages
|
||||
stages.update({
|
||||
"executing": "draft_km_updates",
|
||||
"succeeded": "waiting_owner_review",
|
||||
"failed": "needs_manual_km_triage",
|
||||
})
|
||||
|
||||
workflow["current_stage"] = "waiting_owner_review"
|
||||
workflow["next_action"] = "owner_review_km_draft"
|
||||
workflow["needs_human_review"] = True
|
||||
workflow["writes_km_without_approval"] = False
|
||||
workflow["kb_draft_entry_id"] = km_entry_id
|
||||
|
||||
updated["next_action"] = "owner_review_km_draft"
|
||||
updated["decision_path"] = "draft_created_waiting_owner_review"
|
||||
updated["proposed_action"] = "Hermes 已建立 KM 更新草稿,等待 owner 審核"
|
||||
updated["worker_result"] = {
|
||||
"worker": "Hermes",
|
||||
"executor_type": EXECUTOR_TYPE,
|
||||
"dispatch_id": dispatch_id,
|
||||
"governance_event_id": governance_event_id,
|
||||
"km_draft_entry_id": km_entry_id,
|
||||
"stage": "waiting_owner_review",
|
||||
"status": "draft_created",
|
||||
"writes_km_without_approval": False,
|
||||
}
|
||||
return updated
|
||||
|
||||
|
||||
async def _mark_failed_if_started(dispatch_id: str, error: str) -> None:
|
||||
"""若 worker 已取得 dispatch,將它收斂到 failed,保留錯誤。"""
|
||||
for from_status in ("executing", "dispatched"):
|
||||
try:
|
||||
await transition_status(
|
||||
dispatch_id,
|
||||
from_status,
|
||||
"failed",
|
||||
last_error=error[:500],
|
||||
)
|
||||
return
|
||||
except InvalidStatusTransition:
|
||||
continue
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"hermes_kb_growth_mark_failed_failed",
|
||||
dispatch_id=dispatch_id,
|
||||
from_status=from_status,
|
||||
error=str(exc),
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
def _pick_first(*sources: dict[str, Any], key: str) -> Any:
|
||||
for source in sources:
|
||||
if key in source:
|
||||
return source[key]
|
||||
return None
|
||||
|
||||
|
||||
def _format_unknown(value: Any) -> str:
|
||||
return "unknown" if value is None else str(value)
|
||||
|
||||
|
||||
def _format_ratio(value: Any) -> str:
|
||||
try:
|
||||
return f"{float(value) * 100:.1f}%"
|
||||
except (TypeError, ValueError):
|
||||
return "unknown"
|
||||
289
apps/api/src/jobs/incident_lifecycle_reconciler.py
Normal file
289
apps/api/src/jobs/incident_lifecycle_reconciler.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""
|
||||
Incident Lifecycle Reconciler
|
||||
=============================
|
||||
|
||||
把已有強證據的舊 stuck incident 收斂回 RESOLVED。
|
||||
|
||||
範圍刻意保守:
|
||||
- auto_repair_executions.success = true
|
||||
- approval_records.status = EXECUTION_SUCCESS
|
||||
- approval_records.status = EXPIRED
|
||||
|
||||
不處理單純 APPROVED / NO_ACTION / manual_required,避免把仍需人工的事件
|
||||
誤當作自動修復完成。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.core.config import settings
|
||||
from src.db.base import get_db_context
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
BATCH_LIMIT = 100
|
||||
INTERVAL_SECONDS = 1800
|
||||
_PROMETHEUS_TIMEOUT_SECONDS = 5.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LifecycleCandidate:
|
||||
incident_id: str
|
||||
resolution_type: str
|
||||
reason: str
|
||||
direct_db_only: bool = False
|
||||
|
||||
|
||||
async def run_incident_lifecycle_reconciler_loop() -> None:
|
||||
"""每 30 分鐘收斂一小批已有完成證據的 stuck incident。"""
|
||||
while True:
|
||||
try:
|
||||
resolved, errors = await reconcile_stuck_incidents()
|
||||
if resolved > 0 or errors > 0:
|
||||
logger.info(
|
||||
"incident_lifecycle_reconciler_done",
|
||||
resolved=resolved,
|
||||
errors=errors,
|
||||
batch_limit=BATCH_LIMIT,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("incident_lifecycle_reconciler_loop_failed", error=str(exc))
|
||||
|
||||
await asyncio.sleep(INTERVAL_SECONDS)
|
||||
|
||||
|
||||
async def reconcile_stuck_incidents(limit: int = BATCH_LIMIT) -> tuple[int, int]:
|
||||
"""
|
||||
找出已完成但仍卡在 INVESTIGATING 的 incident,透過 IncidentService 統一路徑結案。
|
||||
|
||||
Returns:
|
||||
(resolved_count, error_count)
|
||||
"""
|
||||
candidates = await _fetch_candidates(limit)
|
||||
remaining = max(0, limit - len(candidates))
|
||||
if remaining > 0:
|
||||
active_alertnames = await _fetch_active_alertnames()
|
||||
if active_alertnames is not None:
|
||||
candidates.extend(
|
||||
await _fetch_inactive_or_duplicate_alert_candidates(
|
||||
limit=remaining,
|
||||
active_alertnames=active_alertnames,
|
||||
exclude_incident_ids={c.incident_id for c in candidates},
|
||||
)
|
||||
)
|
||||
|
||||
if not candidates:
|
||||
return 0, 0
|
||||
|
||||
from src.services.incident_service import get_incident_service
|
||||
|
||||
incident_service = get_incident_service()
|
||||
resolved = 0
|
||||
errors = 0
|
||||
|
||||
for candidate in candidates:
|
||||
try:
|
||||
if candidate.direct_db_only:
|
||||
result = await _resolve_db_only(candidate.incident_id)
|
||||
else:
|
||||
result = await incident_service.resolve_incident(
|
||||
candidate.incident_id,
|
||||
resolution_type=candidate.resolution_type,
|
||||
emit_postmortem=False,
|
||||
)
|
||||
if not result:
|
||||
continue
|
||||
resolved += 1
|
||||
logger.info(
|
||||
"incident_lifecycle_reconciled",
|
||||
incident_id=candidate.incident_id,
|
||||
reason=candidate.reason,
|
||||
resolution_type=candidate.resolution_type,
|
||||
direct_db_only=candidate.direct_db_only,
|
||||
)
|
||||
except Exception as exc:
|
||||
errors += 1
|
||||
logger.warning(
|
||||
"incident_lifecycle_reconcile_failed",
|
||||
incident_id=candidate.incident_id,
|
||||
reason=candidate.reason,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
return resolved, errors
|
||||
|
||||
|
||||
async def _fetch_active_alertnames() -> set[str] | None:
|
||||
"""Read current firing alertnames from Prometheus. None means fail-closed."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=_PROMETHEUS_TIMEOUT_SECONDS) as client:
|
||||
response = await client.get(
|
||||
f"{settings.PROMETHEUS_URL.rstrip('/')}/api/v1/query",
|
||||
params={"query": 'ALERTS{alertstate="firing"}'},
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
logger.warning("incident_lifecycle_active_alerts_fetch_failed", error=str(exc))
|
||||
return None
|
||||
|
||||
result = payload.get("data", {}).get("result", [])
|
||||
active_alertnames = {
|
||||
item.get("metric", {}).get("alertname")
|
||||
for item in result
|
||||
if item.get("metric", {}).get("alertname")
|
||||
}
|
||||
logger.info(
|
||||
"incident_lifecycle_active_alerts_loaded",
|
||||
active_alert_count=len(active_alertnames),
|
||||
)
|
||||
return active_alertnames
|
||||
|
||||
|
||||
async def _resolve_db_only(incident_id: str) -> bool:
|
||||
from src.repositories.incident_repository import get_incident_repository
|
||||
|
||||
now = now_taipei()
|
||||
return await get_incident_repository().update_status(
|
||||
incident_id=incident_id,
|
||||
status="resolved",
|
||||
updated_at=now,
|
||||
resolved_at=now,
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_candidates(limit: int) -> list[LifecycleCandidate]:
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
text(
|
||||
"""
|
||||
WITH stale AS (
|
||||
SELECT
|
||||
i.incident_id,
|
||||
i.created_at,
|
||||
EXISTS (
|
||||
SELECT 1
|
||||
FROM auto_repair_executions are
|
||||
WHERE are.incident_id = i.incident_id
|
||||
AND are.success IS TRUE
|
||||
) AS has_success_auto_repair,
|
||||
EXISTS (
|
||||
SELECT 1
|
||||
FROM approval_records ar
|
||||
WHERE ar.incident_id = i.incident_id
|
||||
AND ar.status::text = 'EXECUTION_SUCCESS'
|
||||
) AS has_execution_success,
|
||||
EXISTS (
|
||||
SELECT 1
|
||||
FROM approval_records ar
|
||||
WHERE ar.incident_id = i.incident_id
|
||||
AND ar.status::text = 'EXPIRED'
|
||||
) AS has_expired_approval
|
||||
FROM incidents i
|
||||
WHERE i.status = 'INVESTIGATING'
|
||||
AND i.created_at <= now() - interval '24 hours'
|
||||
)
|
||||
SELECT
|
||||
incident_id,
|
||||
CASE
|
||||
WHEN has_success_auto_repair THEN 'auto_repair'
|
||||
WHEN has_execution_success THEN 'auto_repair'
|
||||
ELSE 'timeout'
|
||||
END AS resolution_type,
|
||||
CASE
|
||||
WHEN has_success_auto_repair THEN 'auto_repair_execution_success'
|
||||
WHEN has_execution_success THEN 'approval_execution_success'
|
||||
ELSE 'approval_expired'
|
||||
END AS reason
|
||||
FROM stale
|
||||
WHERE has_success_auto_repair
|
||||
OR has_execution_success
|
||||
OR has_expired_approval
|
||||
ORDER BY created_at DESC
|
||||
LIMIT :limit
|
||||
"""
|
||||
),
|
||||
{
|
||||
"limit": limit,
|
||||
},
|
||||
)
|
||||
rows = result.mappings().all()
|
||||
|
||||
return [
|
||||
LifecycleCandidate(
|
||||
incident_id=str(row["incident_id"]),
|
||||
resolution_type=str(row["resolution_type"]),
|
||||
reason=str(row["reason"]),
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
async def _fetch_inactive_or_duplicate_alert_candidates(
|
||||
*,
|
||||
limit: int,
|
||||
active_alertnames: set[str],
|
||||
exclude_incident_ids: set[str],
|
||||
) -> list[LifecycleCandidate]:
|
||||
"""
|
||||
收斂 Alertmanager 已不再 firing 的舊 incident,以及同一 active alertname 的舊重複案。
|
||||
|
||||
若 Prometheus/Alertmanager 讀不到 active alertnames,上層會 fail-closed 不呼叫本函式。
|
||||
"""
|
||||
active_list = list(active_alertnames) or ["__no_active_alertnames__"]
|
||||
exclude_list = list(exclude_incident_ids) or ["__no_excluded_incidents__"]
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
text(
|
||||
"""
|
||||
WITH ranked AS (
|
||||
SELECT
|
||||
i.incident_id,
|
||||
i.alertname,
|
||||
i.created_at,
|
||||
row_number() OVER (
|
||||
PARTITION BY i.alertname
|
||||
ORDER BY i.created_at DESC, i.incident_id DESC
|
||||
) AS rn
|
||||
FROM incidents i
|
||||
WHERE i.status = 'INVESTIGATING'
|
||||
AND i.created_at <= now() - interval '24 hours'
|
||||
AND NOT (i.incident_id = ANY(:exclude_incident_ids))
|
||||
)
|
||||
SELECT
|
||||
incident_id,
|
||||
CASE
|
||||
WHEN alertname = ANY(:active_alertnames)
|
||||
THEN 'active_duplicate_stale'
|
||||
ELSE 'inactive_alert_stale'
|
||||
END AS reason
|
||||
FROM ranked
|
||||
WHERE NOT (alertname = ANY(:active_alertnames) AND rn = 1)
|
||||
ORDER BY created_at ASC
|
||||
LIMIT :limit
|
||||
"""
|
||||
),
|
||||
{
|
||||
"active_alertnames": active_list,
|
||||
"exclude_incident_ids": exclude_list,
|
||||
"limit": limit,
|
||||
},
|
||||
)
|
||||
rows = result.mappings().all()
|
||||
|
||||
return [
|
||||
LifecycleCandidate(
|
||||
incident_id=str(row["incident_id"]),
|
||||
resolution_type="timeout",
|
||||
reason=str(row["reason"]),
|
||||
direct_db_only=True,
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
@@ -28,7 +28,7 @@ from datetime import timedelta
|
||||
import structlog
|
||||
from sqlalchemy import select, update
|
||||
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import AiGovernanceEvent, KnowledgeEntryRecord
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -129,7 +129,7 @@ class KbRotCleaner:
|
||||
rot_reasons: dict[str, list[str]] = {}
|
||||
total = 0
|
||||
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
# 只掃 active 狀態(非 archived)
|
||||
q = await session.execute(
|
||||
select(KnowledgeEntryRecord).where(
|
||||
@@ -193,7 +193,7 @@ class KbRotCleaner:
|
||||
if not result.stale_ids:
|
||||
return
|
||||
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
# 逐條更新(避免 bulk update 覆蓋 tags JSONB)
|
||||
q = await session.execute(
|
||||
select(KnowledgeEntryRecord).where(
|
||||
@@ -220,7 +220,7 @@ class KbRotCleaner:
|
||||
async def _save_event(self, result: RotScanResult) -> None:
|
||||
"""寫 kb_stale 事件到 ai_governance_events。"""
|
||||
try:
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
event = AiGovernanceEvent(
|
||||
event_type="kb_stale",
|
||||
details=result.to_dict(),
|
||||
|
||||
@@ -25,7 +25,9 @@ Feature Flag:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import structlog
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
@@ -33,7 +33,7 @@ from datetime import timedelta
|
||||
import structlog
|
||||
from sqlalchemy import and_, select, update
|
||||
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import KnowledgeEntryRecord
|
||||
from src.models.knowledge import EntryStatus
|
||||
from src.utils.timezone import now_taipei
|
||||
@@ -112,8 +112,7 @@ class KnowledgeDecayJob:
|
||||
cutoff = now_taipei() - timedelta(days=DECAY_AGE_DAYS)
|
||||
decayable_statuses = [EntryStatus.DRAFT.value, EntryStatus.REVIEW.value]
|
||||
|
||||
session_factory = get_session_factory()
|
||||
async with session_factory() as db:
|
||||
async with get_db_context() as db:
|
||||
# 查:30 天未引用(view_count=0)且 updated_at < cutoff 的 draft/review 條目
|
||||
stmt = select(KnowledgeEntryRecord).where(
|
||||
and_(
|
||||
|
||||
@@ -29,7 +29,7 @@ from datetime import timedelta
|
||||
import structlog
|
||||
from sqlalchemy import and_, select
|
||||
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import AgentSession, AiGovernanceEvent, AutoRepairExecution, IncidentEvidence
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -109,9 +109,7 @@ class OfflineReplayService:
|
||||
|
||||
async def _run_replay(self) -> OfflineReplayReport:
|
||||
cutoff = now_taipei() - timedelta(days=REPLAY_LOOKBACK_DAYS)
|
||||
session_factory = get_session_factory()
|
||||
|
||||
async with session_factory() as db:
|
||||
async with get_db_context() as db:
|
||||
# 1. 取最近 N 個有 AgentSession(coordinator) 的 Incident
|
||||
stmt = (
|
||||
select(AgentSession.incident_id)
|
||||
@@ -137,7 +135,7 @@ class OfflineReplayService:
|
||||
)
|
||||
|
||||
results: list[IncidentReplayResult] = []
|
||||
async with session_factory() as db:
|
||||
async with get_db_context() as db:
|
||||
for incident_id in incident_ids:
|
||||
r = await self._replay_one(db, incident_id)
|
||||
results.append(r)
|
||||
|
||||
@@ -31,20 +31,26 @@ from fastapi.responses import JSONResponse, Response
|
||||
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
||||
from sentry_sdk.integrations.fastapi import FastApiIntegration
|
||||
from sentry_sdk.integrations.starlette import StarletteIntegration
|
||||
from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware
|
||||
|
||||
from src.api.v1 import agents as agents_v1 # Phase 9.5: Agent Teams API
|
||||
from src.api.v1 import ai as ai_v1
|
||||
from src.api.v1 import aider_events as aider_events_v1 # aider-watch v2 ADR-091
|
||||
from src.api.v1 import ai_governance as ai_governance_v1 # 2026-05-02: /governance 頁面 3 endpoints
|
||||
from src.api.v1 import (
|
||||
ai_governance as ai_governance_v1, # 2026-05-02: /governance 頁面 3 endpoints
|
||||
)
|
||||
from src.api.v1 import ai_slo as ai_slo_v1 # Phase 6 ADR-087: AI SLO 自我治理
|
||||
from src.api.v1 import aider_events as aider_events_v1 # aider-watch v2 ADR-091
|
||||
from src.api.v1 import aiops_kpi as aiops_kpi_v1 # ADR-090 § Phase 7 KPI Dashboard
|
||||
from src.api.v1 import aiops_timeline as aiops_timeline_v1 # 2026-04-27 Wave8-X3 B4 timeline endpoint
|
||||
from src.api.v1 import approvals as approvals_v1
|
||||
from src.api.v1 import (
|
||||
aiops_timeline as aiops_timeline_v1, # 2026-04-27 Wave8-X3 B4 timeline endpoint
|
||||
)
|
||||
from src.api.v1 import alert_operation_logs as alert_operation_logs_v1
|
||||
from src.api.v1 import approvals as approvals_v1
|
||||
from src.api.v1 import audit_logs as audit_logs_v1
|
||||
from src.api.v1 import auto_repair as auto_repair_v1 # #8: 自動升級決策
|
||||
from src.api.v1 import csrf as csrf_v1 # Phase 20: CSRF Protection
|
||||
from src.api.v1 import dashboard as dashboard_v1
|
||||
from src.api.v1 import drift as drift_v1 # Phase 25 P2: Config Drift Detection
|
||||
from src.api.v1 import errors as errors_v1 # #40: Sentry 錯誤 BFF API
|
||||
from src.api.v1 import (
|
||||
gitea_webhook as gitea_webhook_v1, # ADR-059: Gitea → OpenClaw (GitHub → Gitea 遷移)
|
||||
@@ -56,19 +62,20 @@ from src.api.v1 import incidents as incidents_v1 # Phase 6.4: Decision Proposal
|
||||
from src.api.v1 import knowledge as knowledge_v1 # KB Phase 1: Knowledge Base
|
||||
from src.api.v1 import learning as learning_v1 # Phase D-G P0: Learning API
|
||||
from src.api.v1 import metrics as metrics_v1 # Phase 7: Gold Metrics (真實血脈)
|
||||
from src.api.v1 import monitoring as monitoring_v1 # 2026-04-03: 監控工具狀態
|
||||
from src.api.v1 import notifications as notifications_v1 # 2026-04-10: 通知頻道狀態
|
||||
from src.api.v1 import (
|
||||
platform as platform_v1, # AwoooP Phase 4: Platform Shell(Shadow Mode)
|
||||
)
|
||||
from src.api.v1 import playbooks as playbooks_v1 # #7: Playbook 萃取
|
||||
from src.api.v1 import proposals as proposals_v1 # Phase 6.4h: Proposals CRUD API
|
||||
from src.api.v1 import rag as rag_v1 # Phase 33 ADR-067: RAG 知識庫
|
||||
from src.api.v1 import (
|
||||
sentry_webhook as sentry_webhook_v1, # Phase 10.2.1: Sentry → Telegram
|
||||
)
|
||||
from src.api.v1 import (
|
||||
signoz_webhook as signoz_webhook_v1, # Phase 21: SignOz → Telegram (ADR-037)
|
||||
)
|
||||
from src.api.v1 import drift as drift_v1 # Phase 25 P2: Config Drift Detection
|
||||
from src.api.v1 import platform as platform_v1 # AwoooP Phase 4: Platform Shell(Shadow Mode)
|
||||
from src.api.v1 import rag as rag_v1 # Phase 33 ADR-067: RAG 知識庫
|
||||
from src.api.v1 import monitoring as monitoring_v1 # 2026-04-03: 監控工具狀態
|
||||
from src.api.v1 import notifications as notifications_v1 # 2026-04-10: 通知頻道狀態
|
||||
from src.api.v1 import stats as stats_v1 # Phase 6.5: Statistics Analytics
|
||||
from src.api.v1 import telegram as telegram_v1 # Phase 5.4: Telegram Gateway
|
||||
from src.api.v1 import telegram_webhook as telegram_webhook_v1 # ADR-094: Webhook入口
|
||||
@@ -76,10 +83,13 @@ from src.api.v1 import terminal as terminal_v1 # Phase 19.1: Omni-Terminal SSE
|
||||
from src.api.v1 import timeline as timeline_v1
|
||||
from src.api.v1 import webhooks as webhooks_v1
|
||||
from src.core.config import settings
|
||||
from src.core.feature_flags import aiops_flags # ADR-080: AI 自主化飛輪 feature flags 啟動驗證
|
||||
from src.core.http_client import close_all_http_clients, init_all_http_clients
|
||||
from src.core.logging import get_logger, setup_logging
|
||||
from src.core.redis_client import close_redis_pool, init_redis_pool
|
||||
from src.core.redis_client import (
|
||||
close_redis_pool,
|
||||
close_worker_redis_pool,
|
||||
init_redis_pool,
|
||||
)
|
||||
from src.core.sse import get_publisher
|
||||
from src.core.telemetry import setup_telemetry, shutdown_telemetry
|
||||
|
||||
@@ -91,7 +101,10 @@ from src.routers import proposals as proposals_router
|
||||
|
||||
# Legacy route imports (to be migrated)
|
||||
from src.routes import agent, notifications, pipelines, plugins
|
||||
from src.services.adr100_slo_metrics_service import get_adr100_slo_metrics_service
|
||||
from src.services.alert_chain_metrics_service import get_alert_chain_metrics_service
|
||||
from src.services.executor import close_executor
|
||||
from src.services.flywheel_stats_service import get_flywheel_stats_service
|
||||
|
||||
# Phase 5: OpenClaw AI Engine
|
||||
from src.services.openclaw import close_openclaw
|
||||
@@ -266,16 +279,21 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
# 2026-04-05 ogt: 重開機後 Redis 清空,從 DB restore 未解決的 incidents
|
||||
# 統帥批准: 數據必須長久記錄,重開機後自動恢復 Working Memory
|
||||
try:
|
||||
from src.services.incident_service import get_incident_service
|
||||
from sqlalchemy import select
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import IncidentRecord
|
||||
from sqlalchemy import select
|
||||
from src.models.incident import IncidentStatus
|
||||
from src.services.incident_service import get_incident_service
|
||||
|
||||
incident_service = get_incident_service()
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
select(IncidentRecord).where(
|
||||
IncidentRecord.status.in_(["investigating", "mitigating"])
|
||||
IncidentRecord.status.in_([
|
||||
IncidentStatus.INVESTIGATING,
|
||||
IncidentStatus.MITIGATING,
|
||||
])
|
||||
)
|
||||
)
|
||||
records = result.scalars().all()
|
||||
@@ -283,31 +301,16 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
restored = 0
|
||||
for record in records:
|
||||
try:
|
||||
from src.models.incident import Incident
|
||||
incident = Incident(
|
||||
incident_id=record.incident_id,
|
||||
status=record.status,
|
||||
severity=record.severity,
|
||||
signals=record.signals or [],
|
||||
affected_services=record.affected_services or [],
|
||||
decision_chain=record.decision_chain,
|
||||
proposal_ids=record.proposal_ids or [],
|
||||
outcome=record.outcome,
|
||||
created_at=record.created_at,
|
||||
updated_at=record.updated_at,
|
||||
resolved_at=record.resolved_at,
|
||||
closed_at=record.closed_at,
|
||||
ttl_days=record.ttl_days,
|
||||
vectorized=record.vectorized,
|
||||
# ADR-073: 分類欄位必須還原,否則 KM 寫入時全為 "unknown"
|
||||
notification_type=record.notification_type,
|
||||
alert_category=record.alert_category,
|
||||
)
|
||||
incident = incident_service._record_to_incident(record)
|
||||
if await incident_service.save_to_working_memory(incident):
|
||||
restored += 1
|
||||
except Exception:
|
||||
except Exception as record_error:
|
||||
# 舊資料 source 值不合法(node-exporter 等)→ 跳過
|
||||
pass
|
||||
logger.warning(
|
||||
"working_memory_warmup_record_skipped",
|
||||
incident_id=getattr(record, "incident_id", None),
|
||||
error=str(record_error),
|
||||
)
|
||||
|
||||
logger.info("working_memory_warmed_up", restored=restored, total=len(records))
|
||||
except Exception as e:
|
||||
@@ -350,7 +353,9 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
logger.warning("playbook_pg_backfill_schedule_failed", error=str(e))
|
||||
|
||||
try:
|
||||
from src.services.playbook_embedding_service import ensure_playbook_embeddings_indexed
|
||||
from src.services.playbook_embedding_service import (
|
||||
ensure_playbook_embeddings_indexed,
|
||||
)
|
||||
asyncio.create_task(ensure_playbook_embeddings_indexed())
|
||||
logger.info("playbook_embedding_indexing_scheduled")
|
||||
except Exception as e:
|
||||
@@ -498,6 +503,40 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
except Exception as e:
|
||||
logger.warning("approval_timeout_resolver_schedule_failed", error=str(e))
|
||||
|
||||
# T73: 已有完成證據但仍卡在 INVESTIGATING 的舊 incident 小批次收斂。
|
||||
# 僅處理 auto-repair success / approval EXECUTION_SUCCESS / approval EXPIRED,
|
||||
# 不自動關閉 manual_required 或單純 APPROVED 事件。
|
||||
try:
|
||||
from src.jobs.incident_lifecycle_reconciler import (
|
||||
INTERVAL_SECONDS as INCIDENT_LIFECYCLE_RECONCILER_INTERVAL,
|
||||
)
|
||||
from src.jobs.incident_lifecycle_reconciler import (
|
||||
run_incident_lifecycle_reconciler_loop,
|
||||
)
|
||||
asyncio.create_task(run_incident_lifecycle_reconciler_loop())
|
||||
logger.info(
|
||||
"incident_lifecycle_reconciler_scheduled",
|
||||
interval_sec=INCIDENT_LIFECYCLE_RECONCILER_INTERVAL,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("incident_lifecycle_reconciler_schedule_failed", error=str(e))
|
||||
|
||||
# AwoooP Ansible check-mode worker.
|
||||
# 只執行 ansible-playbook --check --diff 並回寫 automation_operation_log;
|
||||
# apply 仍必須走 approval gate,本 worker 不寫 auto_repair_executions。
|
||||
try:
|
||||
from src.jobs.awooop_ansible_check_mode_job import (
|
||||
run_awooop_ansible_check_mode_loop,
|
||||
)
|
||||
asyncio.create_task(run_awooop_ansible_check_mode_loop())
|
||||
logger.info(
|
||||
"awooop_ansible_check_mode_worker_scheduled",
|
||||
enabled=settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER,
|
||||
interval_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("awooop_ansible_check_mode_worker_schedule_failed", error=str(e))
|
||||
|
||||
# ADR-083 Phase 3: Evolver Agent(每日)— Playbook 自動合併 + 低信任封存
|
||||
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 3 初始建立
|
||||
try:
|
||||
@@ -509,7 +548,9 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
|
||||
# ADR-104 T2: LLM Playbook DRAFT governance(每小時)
|
||||
try:
|
||||
from src.jobs.playbook_generation_governance_job import run_playbook_generation_governance_loop
|
||||
from src.jobs.playbook_generation_governance_job import (
|
||||
run_playbook_generation_governance_loop,
|
||||
)
|
||||
asyncio.create_task(run_playbook_generation_governance_loop())
|
||||
logger.info(
|
||||
"playbook_generation_governance_loop_scheduled",
|
||||
@@ -553,11 +594,11 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
# 2026-04-27 P3.1-T3 by Claude
|
||||
try:
|
||||
from src.utils.timezone import now_taipei
|
||||
from datetime import datetime as _dt
|
||||
|
||||
async def _run_kb_rot_cleaner_loop() -> None:
|
||||
from src.jobs.kb_rot_cleaner import get_kb_rot_cleaner
|
||||
import asyncio as _asyncio
|
||||
|
||||
from src.jobs.kb_rot_cleaner import get_kb_rot_cleaner
|
||||
while True:
|
||||
try:
|
||||
now = now_taipei()
|
||||
@@ -648,14 +689,24 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
except Exception as e:
|
||||
logger.warning("governance_dispatcher_schedule_failed", error=str(e))
|
||||
|
||||
# T90 2026-05-19 ogt + Codex: Hermes KB growth worker(每 5 分鐘)
|
||||
# 消費 knowledge_degradation 的 hermes_kb_growth_healthcheck dispatch,
|
||||
# 只產生 REVIEW 草稿並停在 owner review,不直接批准或發布 KM。
|
||||
try:
|
||||
from src.jobs.hermes_kb_growth_worker import run_hermes_kb_growth_loop
|
||||
asyncio.create_task(run_hermes_kb_growth_loop())
|
||||
logger.info("hermes_kb_growth_worker_scheduled", interval_sec=300)
|
||||
except Exception as e:
|
||||
logger.warning("hermes_kb_growth_worker_schedule_failed", error=str(e))
|
||||
|
||||
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
|
||||
# OllamaFailoverManager + OllamaAutoRecoveryService 飛輪接線:
|
||||
# failover 切換時 → recovery_callback → set_current_primary → Redis 持久化
|
||||
# recovery service 每 30s 檢查 → 111 連續 3 次 HEALTHY → 自動切回 → clear_cache
|
||||
# 順序:先取 singleton → wire callback → 啟動 recovery service(才能接收 callback)
|
||||
try:
|
||||
from src.services.ollama_failover_manager import get_ollama_failover_manager
|
||||
from src.services.ollama_auto_recovery import get_ollama_auto_recovery_service
|
||||
from src.services.ollama_failover_manager import get_ollama_failover_manager
|
||||
|
||||
_failover_mgr = get_ollama_failover_manager()
|
||||
_recovery_svc = get_ollama_auto_recovery_service()
|
||||
@@ -668,8 +719,8 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
# alerter 還沒注入 Redis → dedup fail-open,告警會送出且無 dedup 保護(重複告警風險)
|
||||
# 修法:configure_alerter() 提前到 start() 之前;Redis pool 在 lifespan 早期已就緒
|
||||
try:
|
||||
from src.services.failover_alerter import configure_alerter
|
||||
from src.core.redis_client import get_redis
|
||||
from src.services.failover_alerter import configure_alerter
|
||||
configure_alerter(get_redis())
|
||||
logger.info("failover_alerter_configured")
|
||||
except Exception as _alerter_err:
|
||||
@@ -683,7 +734,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
logger.warning("ollama_failover_system_start_failed", error=str(e))
|
||||
|
||||
# 2026-04-27 P3.2.2 by Claude — AI Provider 版本追蹤(每 1 小時)
|
||||
# 探測 5 Provider(ollama/ollama_188/gemini/claude/openclaw_nemo)版本
|
||||
# 探測 5 Provider(ollama/ollama_local/gemini/claude/openclaw_nemo)版本
|
||||
# 寫入 ai_provider_version_history;版本變更時 log warning,P3.2.3 alerter 後續整合
|
||||
try:
|
||||
async def _run_model_version_tracker_loop() -> None:
|
||||
@@ -753,6 +804,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
|
||||
# Phase 6.1: 關閉 Signal Worker (先關閉 Consumer)
|
||||
await close_signal_worker()
|
||||
await close_worker_redis_pool()
|
||||
await publisher.stop()
|
||||
await close_executor()
|
||||
await close_openclaw()
|
||||
@@ -805,11 +857,8 @@ else:
|
||||
# Middleware
|
||||
# =============================================================================
|
||||
|
||||
# 2026-04-03 ogt: Nginx 反向代理修正 — 讓 FastAPI 信任 X-Forwarded-Proto
|
||||
# 解決問題: /api/v1/knowledge (無結尾斜線) 307 redirect 產生 http:// Location
|
||||
# 原因: FastAPI 不知道自己在 HTTPS 後面,redirect 回 http://
|
||||
# 效果: 有了此中間件,307 Location 會是 https://
|
||||
from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware
|
||||
# 2026-04-03 ogt: Nginx 反向代理修正 — 讓 FastAPI 信任 X-Forwarded-Proto。
|
||||
# 避免 /api/v1/knowledge 等 redirect 在 HTTPS 反向代理後產生 http:// Location。
|
||||
app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*")
|
||||
|
||||
# CORS - Strict Whitelist (Iron Law #2)
|
||||
@@ -1005,10 +1054,33 @@ app.include_router(platform_v1.router, prefix="/api/v1/platform", tags=["AwoooP
|
||||
@app.get("/metrics", include_in_schema=False)
|
||||
async def prometheus_metrics() -> Response:
|
||||
"""Prometheus metrics endpoint for alerting"""
|
||||
return Response(
|
||||
content=generate_latest(),
|
||||
media_type=CONTENT_TYPE_LATEST,
|
||||
)
|
||||
# 2026-05-19 Codex — T85 Alert Chain DB evidence refresh.
|
||||
# record_alert_chain_success() 是 process-local gauge;部署後第一個 scrape
|
||||
# 可能尚未收到新 webhook,導致 smoke test 誤判 metric 不存在。
|
||||
# 先用 AwoooP inbound / alert_operation_log 的 durable evidence 回填 last_success。
|
||||
try:
|
||||
await get_alert_chain_metrics_service().refresh_last_success_gauge()
|
||||
except Exception as exc:
|
||||
logger.warning("prometheus_metrics_alert_chain_evidence_error", error=str(exc))
|
||||
|
||||
content = generate_latest().decode("utf-8")
|
||||
# 2026-05-07 ogt + Claude Sonnet 4.6 — INC-20260507-99ADF2 修復
|
||||
# 飛輪指標(awoooi_flywheel_*)原本只在 /api/v1/stats/flywheel/metrics 暴露,
|
||||
# 110 Prom awoooi-api job scrape /metrics 時抓不到 → FlywheelExecutionRateMissing 永久 firing
|
||||
# 修法:在此串入飛輪指標,讓既有 scrape job 無需新增 job 即可抓到
|
||||
try:
|
||||
flywheel_metrics = await get_flywheel_stats_service().compute()
|
||||
content += flywheel_metrics.to_prometheus_lines()
|
||||
except Exception:
|
||||
logger.warning("prometheus_metrics_flywheel_error")
|
||||
# 2026-05-14 Codex — T18 ADR-100 SLO emitter
|
||||
# GovernanceAgent 讀 Prometheus recording rules;若 /metrics 不吐底層 DB totals,
|
||||
# sli:* rules 會全空並每小時重複發 governance_slo_data_gap。
|
||||
try:
|
||||
content += await get_adr100_slo_metrics_service().to_prometheus_lines()
|
||||
except Exception as exc:
|
||||
logger.warning("prometheus_metrics_adr100_slo_error", error=str(exc))
|
||||
return Response(content=content, media_type=CONTENT_TYPE_LATEST)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
@@ -167,6 +167,8 @@ class ApprovalRequest(ApprovalRequestBase):
|
||||
fingerprint: str | None = Field(default=None, description="告警指紋 Hash")
|
||||
hit_count: int = Field(default=1, description="聚合觸發次數")
|
||||
last_seen_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc), description="最後觸發時間")
|
||||
telegram_message_id: int | None = Field(default=None, description="Telegram approval card message ID")
|
||||
telegram_chat_id: int | None = Field(default=None, description="Telegram chat ID for the approval card")
|
||||
# 2026-04-14 Claude Sonnet 4.6: incident_id 已移至 Base(避免 ApprovalRequestCreate 缺欄位)
|
||||
|
||||
@property
|
||||
@@ -216,6 +218,10 @@ class ApprovalRequestResponse(BaseModel):
|
||||
hit_count: int = 1
|
||||
last_seen_at: datetime | None = None
|
||||
# Phase 6.5: Incident 關聯 (用於簽核後更新 Incident 狀態)
|
||||
incident_id: str | None = None
|
||||
matched_playbook_id: str | None = None
|
||||
telegram_message_id: int | None = None
|
||||
telegram_chat_id: int | None = None
|
||||
metadata: dict | None = None
|
||||
|
||||
@classmethod
|
||||
@@ -241,6 +247,10 @@ class ApprovalRequestResponse(BaseModel):
|
||||
hit_count=approval.hit_count,
|
||||
last_seen_at=approval.last_seen_at,
|
||||
# Phase 6.5
|
||||
incident_id=approval.incident_id,
|
||||
matched_playbook_id=approval.matched_playbook_id,
|
||||
telegram_message_id=approval.telegram_message_id,
|
||||
telegram_chat_id=approval.telegram_chat_id,
|
||||
metadata=approval.metadata,
|
||||
)
|
||||
|
||||
|
||||
@@ -87,13 +87,27 @@ class DispatchItem(BaseModel):
|
||||
governance_event_id: str
|
||||
event_type: str
|
||||
dispatch_status: str
|
||||
executor_type: str | None = None
|
||||
proposed_action: str = Field(description="≤120 字動作摘要")
|
||||
playbook_id: str | None = None
|
||||
playbook_trust: float | None = Field(default=None, ge=0.0, le=1.0)
|
||||
created_at: datetime
|
||||
dispatched_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
operator_note: str | None = None
|
||||
decision_path: str | None = None
|
||||
workflow_stage: str | None = None
|
||||
workflow_steps: list[str] = Field(default_factory=list)
|
||||
next_action: str | None = None
|
||||
lead_agent: str | None = None
|
||||
support_agents: list[str] = Field(default_factory=list)
|
||||
human_owner: str | None = None
|
||||
kb_draft_entry_id: str | None = None
|
||||
worker_status: str | None = None
|
||||
dry_run_plan_fingerprint: str | None = None
|
||||
archived_count: int | None = None
|
||||
stale_ratio_snapshot: dict | None = None
|
||||
|
||||
|
||||
class GovernanceQueueResponse(BaseModel):
|
||||
@@ -107,6 +121,475 @@ class GovernanceQueueResponse(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoint 2B: KM review draft dedupe
|
||||
# =============================================================================
|
||||
|
||||
class KnowledgeReviewDraftDedupeGroup(BaseModel):
|
||||
governance_event_id: str
|
||||
canonical_entry_id: str
|
||||
canonical_title: str
|
||||
canonical_updated_at: datetime | None = None
|
||||
preferred_source: Literal["dispatch_context", "latest_review_draft"]
|
||||
duplicate_entry_ids: list[str] = Field(default_factory=list)
|
||||
duplicate_count: int
|
||||
total_entries: int
|
||||
suggested_action: str
|
||||
owner_action: str
|
||||
writes_on_read: bool = False
|
||||
can_archive_without_owner_approval: bool = False
|
||||
archive_history: list[DispatchItem] = Field(default_factory=list)
|
||||
|
||||
|
||||
class KnowledgeReviewDraftDedupeResponse(BaseModel):
|
||||
schema_version: str = "km_review_draft_dedupe_v1"
|
||||
total_review_drafts: int
|
||||
event_group_total: int
|
||||
duplicate_draft_total: int
|
||||
groups: list[KnowledgeReviewDraftDedupeGroup]
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeReviewDraftArchiveRequest(BaseModel):
|
||||
canonical_entry_id: str = Field(min_length=1, max_length=120)
|
||||
duplicate_entry_ids: list[str] = Field(min_length=1, max_length=100)
|
||||
owner: str = Field(default="operator_console", min_length=1, max_length=100)
|
||||
owner_approved: bool = False
|
||||
dry_run: bool = False
|
||||
dry_run_plan_fingerprint: str | None = Field(
|
||||
default=None,
|
||||
max_length=80,
|
||||
description="Dry-run response fingerprint that must be echoed before a write.",
|
||||
)
|
||||
|
||||
|
||||
class KnowledgeReviewDraftStaleRatioSnapshot(BaseModel):
|
||||
stale_count: int
|
||||
total_count: int
|
||||
stale_ratio: float
|
||||
threshold: float
|
||||
stale_days: int
|
||||
|
||||
|
||||
class KnowledgeReviewDraftArchiveResponse(BaseModel):
|
||||
schema_version: str = "km_review_draft_archive_v1"
|
||||
governance_event_id: str
|
||||
canonical_entry_id: str
|
||||
requested_duplicate_entry_ids: list[str]
|
||||
archived_entry_ids: list[str] = Field(default_factory=list)
|
||||
skipped_entry_ids: list[str] = Field(default_factory=list)
|
||||
would_archive_entry_ids: list[str] = Field(default_factory=list)
|
||||
status: Literal["dry_run", "archived", "noop_already_archived"]
|
||||
owner: str
|
||||
owner_approved: bool
|
||||
dry_run: bool
|
||||
writes_km: bool
|
||||
writes_governance_audit: bool
|
||||
audit_dispatch_id: str | None = None
|
||||
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
|
||||
stale_ratio_recheck_status: Literal[
|
||||
"dry_run",
|
||||
"completed",
|
||||
"already_active",
|
||||
"not_requested",
|
||||
] = "not_requested"
|
||||
stale_ratio_recheck_dispatch_id: str | None = None
|
||||
dry_run_plan_fingerprint: str | None = None
|
||||
next_action: str = "stale_ratio_recheck"
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoint 2C: KM stale candidates
|
||||
# =============================================================================
|
||||
|
||||
class KnowledgeStaleCandidate(BaseModel):
|
||||
entry_id: str
|
||||
project_id: str
|
||||
title: str
|
||||
entry_type: str
|
||||
category: str | None = None
|
||||
status: str
|
||||
source: str | None = None
|
||||
updated_at: datetime | None = None
|
||||
stale_days: int
|
||||
view_count: int
|
||||
priority_score: int
|
||||
priority_tier: Literal["P0", "P1", "P2"]
|
||||
recommended_action: Literal[
|
||||
"refresh_with_evidence",
|
||||
"owner_review",
|
||||
"archive_or_supersede",
|
||||
]
|
||||
reasons: list[str] = Field(default_factory=list)
|
||||
correlation_sources: list[str] = Field(default_factory=list)
|
||||
related_incident_id: str | None = None
|
||||
related_playbook_id: str | None = None
|
||||
related_approval_id: str | None = None
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
owner_review_dispatch_id: str | None = None
|
||||
owner_review_status: str | None = None
|
||||
owner_review_stage: str | None = None
|
||||
owner_review_next_action: str | None = None
|
||||
|
||||
|
||||
class KnowledgeStaleCandidatesResponse(BaseModel):
|
||||
schema_version: str = "km_stale_candidates_v1"
|
||||
project_id: str
|
||||
total_stale: int
|
||||
returned: int
|
||||
threshold_days: int
|
||||
writes_on_read: bool = False
|
||||
manual_review_required: bool = True
|
||||
items: list[KnowledgeStaleCandidate]
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewRequest(BaseModel):
|
||||
owner: str = Field(default="operator_console", min_length=1, max_length=100)
|
||||
owner_note: str | None = Field(default=None, max_length=240)
|
||||
dry_run: bool = False
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewResponse(BaseModel):
|
||||
schema_version: str = "km_stale_owner_review_v1"
|
||||
entry_id: str
|
||||
project_id: str
|
||||
status: Literal["dry_run", "queued", "already_queued"]
|
||||
governance_event_id: str | None = None
|
||||
dispatch_id: str | None = None
|
||||
workflow_stage: str
|
||||
recommended_action: Literal[
|
||||
"refresh_with_evidence",
|
||||
"owner_review",
|
||||
"archive_or_supersede",
|
||||
]
|
||||
owner: str
|
||||
owner_note: str | None = None
|
||||
writes_km: bool = False
|
||||
writes_governance_audit: bool
|
||||
next_action: str = "owner_review_stale_km_candidate"
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewBatchQueueRequest(BaseModel):
|
||||
project_id: str = Field(default="awoooi", min_length=1, max_length=64)
|
||||
priority_tiers: list[Literal["P0", "P1", "P2"]] = Field(
|
||||
default_factory=lambda: ["P0", "P1"],
|
||||
min_length=1,
|
||||
max_length=3,
|
||||
)
|
||||
limit: int = Field(default=10, ge=1, le=50)
|
||||
owner: str = Field(default="operator_console", min_length=1, max_length=100)
|
||||
owner_note: str | None = Field(default=None, max_length=240)
|
||||
dry_run: bool = False
|
||||
dry_run_plan_fingerprint: str | None = Field(
|
||||
default=None,
|
||||
max_length=80,
|
||||
description="Dry-run response fingerprint that must be echoed before queueing a batch.",
|
||||
)
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewBatchItem(BaseModel):
|
||||
entry_id: str
|
||||
title: str
|
||||
priority_tier: Literal["P0", "P1", "P2"]
|
||||
recommended_action: Literal[
|
||||
"refresh_with_evidence",
|
||||
"owner_review",
|
||||
"archive_or_supersede",
|
||||
]
|
||||
status: Literal["would_queue", "queued", "already_queued", "skipped"]
|
||||
reason: str | None = None
|
||||
governance_event_id: str | None = None
|
||||
dispatch_id: str | None = None
|
||||
workflow_stage: str
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewBatchQueueResponse(BaseModel):
|
||||
schema_version: str = "km_stale_owner_review_batch_v1"
|
||||
project_id: str
|
||||
status: Literal["dry_run", "queued", "noop_already_queued"]
|
||||
owner: str
|
||||
owner_note: str | None = None
|
||||
dry_run: bool
|
||||
priority_tiers: list[str]
|
||||
requested_limit: int
|
||||
candidate_count: int
|
||||
queued_count: int
|
||||
already_queued_count: int
|
||||
skipped_count: int
|
||||
batch_governance_event_id: str | None = None
|
||||
batch_dispatch_id: str | None = None
|
||||
workflow_stage: str
|
||||
writes_km: bool = False
|
||||
writes_governance_audit: bool
|
||||
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
|
||||
dry_run_plan_fingerprint: str | None = None
|
||||
items: list[KnowledgeStaleOwnerReviewBatchItem] = Field(default_factory=list)
|
||||
next_action: str = "owner_review_stale_km_batch"
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewInboxItem(BaseModel):
|
||||
dispatch_id: str
|
||||
governance_event_id: str
|
||||
entry_id: str
|
||||
project_id: str
|
||||
title: str
|
||||
dispatch_status: str
|
||||
workflow_stage: str
|
||||
next_action: str | None = None
|
||||
owner: str | None = None
|
||||
owner_note: str | None = None
|
||||
batch_governance_event_id: str | None = None
|
||||
batch_dispatch_id: str | None = None
|
||||
priority_tier: Literal["P0", "P1", "P2"]
|
||||
priority_score: int
|
||||
recommended_action: Literal[
|
||||
"refresh_with_evidence",
|
||||
"owner_review",
|
||||
"archive_or_supersede",
|
||||
]
|
||||
stale_days: int
|
||||
view_count: int
|
||||
correlation_sources: list[str] = Field(default_factory=list)
|
||||
reasons: list[str] = Field(default_factory=list)
|
||||
related_incident_id: str | None = None
|
||||
related_playbook_id: str | None = None
|
||||
related_approval_id: str | None = None
|
||||
dry_run_plan_fingerprint: str | None = None
|
||||
queued_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewInboxResponse(BaseModel):
|
||||
schema_version: str = "km_stale_owner_review_inbox_v1"
|
||||
project_id: str
|
||||
dispatch_status: str
|
||||
total: int
|
||||
returned: int
|
||||
writes_on_read: bool = False
|
||||
manual_review_required: bool = True
|
||||
items: list[KnowledgeStaleOwnerReviewInboxItem] = Field(default_factory=list)
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewBurnDownItem(BaseModel):
|
||||
completion_dispatch_id: str
|
||||
governance_event_id: str
|
||||
source_dispatch_id: str | None = None
|
||||
recheck_dispatch_id: str | None = None
|
||||
entry_id: str | None = None
|
||||
project_id: str
|
||||
dispatch_status: str
|
||||
workflow_stage: str
|
||||
review_outcome: Literal[
|
||||
"refresh_with_evidence",
|
||||
"archive",
|
||||
"supersede",
|
||||
] | None = None
|
||||
owner: str | None = None
|
||||
completed_at: datetime | None = None
|
||||
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
|
||||
stale_count_delta: int | None = None
|
||||
stale_ratio_delta: float | None = None
|
||||
above_threshold: bool | None = None
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewBurnDownResponse(BaseModel):
|
||||
schema_version: str = "km_stale_owner_review_burndown_v1"
|
||||
project_id: str
|
||||
burn_down_status: Literal["above_threshold", "at_or_below_threshold", "no_data"]
|
||||
current_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
|
||||
entries_to_threshold: int
|
||||
pending_owner_reviews: int
|
||||
completed_owner_reviews: int
|
||||
completion_audit_total: int
|
||||
stale_ratio_recheck_total: int
|
||||
latest_stale_count_delta: int | None = None
|
||||
latest_stale_ratio_delta: float | None = None
|
||||
writes_on_read: bool = False
|
||||
manual_review_required: bool = True
|
||||
returned: int
|
||||
items: list[KnowledgeStaleOwnerReviewBurnDownItem] = Field(default_factory=list)
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewCompletionQueueItem(BaseModel):
|
||||
dispatch_id: str
|
||||
governance_event_id: str
|
||||
entry_id: str
|
||||
project_id: str
|
||||
title: str
|
||||
dispatch_status: str
|
||||
workflow_stage: str
|
||||
readiness: Literal["ready", "blocked", "completed", "failed"]
|
||||
recommended_completion_outcome: Literal[
|
||||
"refresh_with_evidence",
|
||||
"archive",
|
||||
"supersede",
|
||||
]
|
||||
next_action: str
|
||||
blockers: list[str] = Field(default_factory=list)
|
||||
required_owner_fields: list[str] = Field(default_factory=list)
|
||||
can_preview: bool
|
||||
can_confirm_after_preview: bool
|
||||
writes_km_on_confirm: bool
|
||||
owner: str | None = None
|
||||
owner_note: str | None = None
|
||||
batch_governance_event_id: str | None = None
|
||||
batch_dispatch_id: str | None = None
|
||||
priority_tier: Literal["P0", "P1", "P2"]
|
||||
priority_score: int
|
||||
recommended_action: Literal[
|
||||
"refresh_with_evidence",
|
||||
"owner_review",
|
||||
"archive_or_supersede",
|
||||
]
|
||||
stale_days: int
|
||||
view_count: int
|
||||
correlation_sources: list[str] = Field(default_factory=list)
|
||||
reasons: list[str] = Field(default_factory=list)
|
||||
related_incident_id: str | None = None
|
||||
related_playbook_id: str | None = None
|
||||
related_approval_id: str | None = None
|
||||
dry_run_plan_fingerprint: str | None = None
|
||||
queued_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewCompletionQueueResponse(BaseModel):
|
||||
schema_version: str = "km_stale_owner_review_completion_queue_v1"
|
||||
project_id: str
|
||||
status_bucket: Literal["all", "ready", "blocked", "completed", "failed", "pending"]
|
||||
priority_tiers: list[str] = Field(default_factory=list)
|
||||
recommended_completion_outcome: Literal[
|
||||
"all",
|
||||
"refresh_with_evidence",
|
||||
"archive",
|
||||
"supersede",
|
||||
] = "all"
|
||||
batch_governance_event_id: str | None = None
|
||||
can_preview: bool | None = None
|
||||
total: int
|
||||
returned: int
|
||||
pending_count: int
|
||||
ready_count: int
|
||||
blocked_count: int
|
||||
completed_count: int
|
||||
failed_count: int
|
||||
writes_on_read: bool = False
|
||||
manual_review_required: bool = True
|
||||
batch_writes_allowed: bool = False
|
||||
items: list[KnowledgeStaleOwnerReviewCompletionQueueItem] = Field(default_factory=list)
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewCompletionBatchPreviewRequest(BaseModel):
|
||||
project_id: str = Field(default="awoooi", min_length=1, max_length=64)
|
||||
status_bucket: Literal["all", "ready", "blocked", "completed", "failed", "pending"] = "ready"
|
||||
priority_tiers: list[Literal["P0", "P1", "P2"]] = Field(
|
||||
default_factory=lambda: ["P0", "P1", "P2"],
|
||||
min_length=1,
|
||||
max_length=3,
|
||||
)
|
||||
recommended_completion_outcome: Literal[
|
||||
"all",
|
||||
"refresh_with_evidence",
|
||||
"archive",
|
||||
"supersede",
|
||||
] = "all"
|
||||
batch_governance_event_id: str | None = Field(default=None, max_length=120)
|
||||
limit: int = Field(default=10, ge=1, le=30)
|
||||
owner: str = Field(default="operator_console", min_length=1, max_length=100)
|
||||
owner_note: str | None = Field(default=None, max_length=240)
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse(BaseModel):
|
||||
schema_version: str = "km_stale_owner_review_completion_batch_preview_v1"
|
||||
project_id: str
|
||||
status: Literal["dry_run"] = "dry_run"
|
||||
owner: str
|
||||
owner_note: str | None = None
|
||||
status_bucket: Literal["all", "ready", "blocked", "completed", "failed", "pending"]
|
||||
priority_tiers: list[str]
|
||||
recommended_completion_outcome: Literal[
|
||||
"all",
|
||||
"refresh_with_evidence",
|
||||
"archive",
|
||||
"supersede",
|
||||
]
|
||||
batch_governance_event_id: str | None = None
|
||||
requested_limit: int
|
||||
candidate_count: int
|
||||
previewable_count: int
|
||||
blocked_count: int
|
||||
completed_count: int
|
||||
failed_count: int
|
||||
writes_km: bool = False
|
||||
writes_governance_audit: bool = False
|
||||
batch_writes_allowed: bool = False
|
||||
manual_review_required: bool = True
|
||||
dry_run_plan_fingerprint: str
|
||||
next_action: str = "preview_each_ready_item_then_confirm_single_item"
|
||||
items: list[KnowledgeStaleOwnerReviewCompletionQueueItem] = Field(default_factory=list)
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewCompleteRequest(BaseModel):
|
||||
dispatch_id: str | None = Field(
|
||||
default=None,
|
||||
max_length=120,
|
||||
description="Owner-review dispatch id. Optional when the backend can resolve the active item by entry id.",
|
||||
)
|
||||
owner: str = Field(default="operator_console", min_length=1, max_length=100)
|
||||
owner_approved: bool = False
|
||||
dry_run: bool = False
|
||||
review_outcome: Literal[
|
||||
"refresh_with_evidence",
|
||||
"archive",
|
||||
"supersede",
|
||||
]
|
||||
owner_note: str | None = Field(default=None, max_length=500)
|
||||
updated_title: str | None = Field(default=None, min_length=1, max_length=255)
|
||||
updated_content: str | None = Field(default=None, min_length=1)
|
||||
superseded_by_entry_id: str | None = Field(default=None, max_length=120)
|
||||
dry_run_plan_fingerprint: str | None = Field(
|
||||
default=None,
|
||||
max_length=80,
|
||||
description="Dry-run response fingerprint that must be echoed before a write.",
|
||||
)
|
||||
|
||||
|
||||
class KnowledgeStaleOwnerReviewCompleteResponse(BaseModel):
|
||||
schema_version: str = "km_stale_owner_review_complete_v1"
|
||||
entry_id: str
|
||||
project_id: str
|
||||
status: Literal["dry_run", "completed", "already_completed"]
|
||||
review_outcome: Literal[
|
||||
"refresh_with_evidence",
|
||||
"archive",
|
||||
"supersede",
|
||||
]
|
||||
governance_event_id: str
|
||||
dispatch_id: str
|
||||
audit_dispatch_id: str | None = None
|
||||
stale_ratio_recheck_dispatch_id: str | None = None
|
||||
workflow_stage: str
|
||||
owner: str
|
||||
owner_approved: bool
|
||||
dry_run: bool
|
||||
writes_km: bool
|
||||
writes_governance_audit: bool
|
||||
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
|
||||
dry_run_plan_fingerprint: str | None = None
|
||||
next_action: str = "stale_ratio_recheck"
|
||||
generated_at: datetime
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoint 3: summary
|
||||
# =============================================================================
|
||||
|
||||
@@ -29,7 +29,7 @@ from __future__ import annotations
|
||||
from prometheus_client import Histogram
|
||||
|
||||
# Buckets 對齊 NIM 實測分佈(2-27s),並覆蓋三段 timeout 30/20/15s 邊界
|
||||
# 低端(0.5-5s):快速路徑(Ollama 188 本地)
|
||||
# 低端(0.5-5s):快速路徑(Ollama provider pool)
|
||||
# 中端(5-20s):NIM + Gemini fallback
|
||||
# 高端(20-60s):超時 / 慢速 Provider
|
||||
_AGENT_STEP_BUCKETS = [0.5, 1.0, 2.0, 5.0, 10.0, 15.0, 20.0, 30.0, 45.0, 60.0]
|
||||
|
||||
@@ -39,14 +39,15 @@ import hashlib
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import structlog
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.core.redis_client import get_redis
|
||||
from src.db.awooop_models import (
|
||||
AwoooPActiveRevision,
|
||||
AwoooPMcpGatewayAudit,
|
||||
@@ -277,7 +278,7 @@ class McpGateway:
|
||||
self, ctx: GatewayContext, gate_result: GateCheckResult
|
||||
) -> tuple[AwoooPMcpToolRegistry, AwoooPMcpGrant]:
|
||||
"""Gate 3:tool 在白名單 + grant 有效(未到期、未撤銷)"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = datetime.now(UTC)
|
||||
|
||||
# 查 tool registry
|
||||
tool_result = await self._db.execute(
|
||||
@@ -359,14 +360,9 @@ class McpGateway:
|
||||
raise GateApprovalError("write/admin 操作需要 run_id(approval 追蹤用)")
|
||||
|
||||
try:
|
||||
import aioredis
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
redis = aioredis.from_url(settings.REDIS_URL)
|
||||
redis = get_redis()
|
||||
approval_key = f"mcp_approval:{ctx.project_id}:{ctx.agent_id}:{ctx.tool_name}:{ctx.run_id}"
|
||||
approved = await redis.get(approval_key)
|
||||
await redis.aclose()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"mcp_gate5_redis_error",
|
||||
@@ -392,10 +388,7 @@ class McpGateway:
|
||||
parameters: dict[str, Any],
|
||||
) -> MCPToolResult:
|
||||
"""呼叫底層 MCP provider 執行工具"""
|
||||
registry = get_provider_registry()
|
||||
provider = registry.get(ctx.tool_name) or registry.get(
|
||||
tool_row.tool_name if tool_row else ctx.tool_name
|
||||
)
|
||||
provider = await self._resolve_provider(ctx, tool_row)
|
||||
|
||||
# 找不到 provider → 回傳 shadow no-op
|
||||
if provider is None:
|
||||
@@ -411,14 +404,57 @@ class McpGateway:
|
||||
)
|
||||
|
||||
audit_params = dict(parameters)
|
||||
existing_audit = (
|
||||
parameters.get("_mcp_audit")
|
||||
if isinstance(parameters, dict) and isinstance(parameters.get("_mcp_audit"), dict)
|
||||
else {}
|
||||
)
|
||||
audit_params["_mcp_audit"] = {
|
||||
"project_id": ctx.project_id,
|
||||
"agent_id": ctx.agent_id,
|
||||
"run_id": str(ctx.run_id) if ctx.run_id else None,
|
||||
"trace_id": ctx.trace_id,
|
||||
"incident_id": existing_audit.get("incident_id") or ctx.trace_id,
|
||||
"session_id": existing_audit.get("session_id"),
|
||||
"flywheel_node": existing_audit.get("flywheel_node"),
|
||||
"agent_role": existing_audit.get("agent_role") or ctx.agent_id,
|
||||
"gateway_path": "awooop_mcp_gateway",
|
||||
}
|
||||
return await provider.execute(ctx.tool_name, audit_params)
|
||||
|
||||
async def _resolve_provider(
|
||||
self,
|
||||
ctx: GatewayContext,
|
||||
tool_row: AwoooPMcpToolRegistry | None,
|
||||
):
|
||||
"""Find the provider that owns ctx.tool_name.
|
||||
|
||||
ProviderRegistry is keyed by provider name (`kubernetes`, `ssh_host`, ...),
|
||||
while GatewayContext intentionally uses the governed tool name
|
||||
(`kubectl_get`, `ssh_diagnose`, ...). Scan provider tool manifests as the
|
||||
compatibility bridge until registry exposes a first-class tool index.
|
||||
"""
|
||||
registry = get_provider_registry()
|
||||
direct = registry.get(ctx.tool_name)
|
||||
if direct is not None:
|
||||
return direct
|
||||
|
||||
lookup_name = tool_row.tool_name if tool_row else ctx.tool_name
|
||||
for provider in registry.all():
|
||||
try:
|
||||
tools = await provider.list_tools()
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"mcp_gateway_provider_manifest_skipped",
|
||||
provider=getattr(provider, "name", None),
|
||||
tool_name=lookup_name,
|
||||
error=str(exc),
|
||||
)
|
||||
continue
|
||||
if any(tool.name == lookup_name for tool in tools):
|
||||
return provider
|
||||
return None
|
||||
|
||||
# ── Audit log ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def _write_audit(
|
||||
@@ -446,6 +482,15 @@ class McpGateway:
|
||||
json.dumps(result.output, sort_keys=True, default=str).encode()
|
||||
).hexdigest()
|
||||
|
||||
gate_payload = {
|
||||
**gate_result.as_dict(),
|
||||
"schema_version": "awooop_mcp_gateway_audit_v1",
|
||||
"gateway_path": "awooop_mcp_gateway",
|
||||
"policy_enforced": True,
|
||||
"is_shadow": ctx.is_shadow,
|
||||
"required_scope": ctx.required_scope,
|
||||
}
|
||||
|
||||
audit = AwoooPMcpGatewayAudit(
|
||||
project_id=ctx.project_id,
|
||||
run_id=ctx.run_id,
|
||||
@@ -455,16 +500,15 @@ class McpGateway:
|
||||
tool_name=ctx.tool_name,
|
||||
input_hash=input_hash,
|
||||
output_hash=output_hash,
|
||||
gate_result=gate_result.as_dict(),
|
||||
gate_result=gate_payload,
|
||||
result_status=result_status,
|
||||
block_gate=block_gate,
|
||||
block_reason=block_reason,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
if tool_row is not None:
|
||||
self._db.add(audit)
|
||||
await self._db.flush()
|
||||
self._db.add(audit)
|
||||
await self._db.flush()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"mcp_gateway_audit_write_failed",
|
||||
|
||||
@@ -14,6 +14,7 @@ from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -29,7 +30,9 @@ class MCPTool:
|
||||
name: str
|
||||
description: str
|
||||
input_schema: dict[str, Any]
|
||||
server_name: str
|
||||
# 2026-05-06 Codex: 部分舊 provider 的 list_tools() 尚未傳 server_name。
|
||||
# 先給 DTO 預設值,registry 會以 provider.name 補正,避免啟動登記直接 crash。
|
||||
server_name: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -41,12 +44,21 @@ class MCPToolResult:
|
||||
"""
|
||||
|
||||
success: bool
|
||||
execution_id: str
|
||||
execution_id: str = ""
|
||||
output: Any | None = None
|
||||
# 2026-05-06 Codex: 舊 provider 曾使用 data=... 作為成功輸出欄位。
|
||||
# 保留 alias,避免 provider 成功路徑因 dataclass 參數不相容而 crash。
|
||||
data: Any | None = None
|
||||
error: str | None = None
|
||||
duration: float = 0.0
|
||||
timestamp: datetime = field(default_factory=now_taipei)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.execution_id:
|
||||
self.execution_id = f"mcp-{uuid4()}"
|
||||
if self.output is None and self.data is not None:
|
||||
self.output = self.data
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"success": self.success,
|
||||
|
||||
@@ -24,6 +24,7 @@ from typing import Any
|
||||
import httpx
|
||||
|
||||
from src.core.config import settings # P0-13: K8s namespace 由 settings.AWOOOI_K8S_NAMESPACE 提供
|
||||
from src.services.mcp_audit_context import with_mcp_audit_context
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -518,6 +519,13 @@ class MCPBridge:
|
||||
raise ValueError(f"Unknown MCP Server: {server_name}")
|
||||
|
||||
server = self._servers[server_name]
|
||||
parameters = with_mcp_audit_context(
|
||||
parameters,
|
||||
session_id=f"mcp_bridge:{execution_id}",
|
||||
flywheel_node="govern",
|
||||
agent_role="mcp_bridge",
|
||||
gateway_path="legacy_mcp_bridge",
|
||||
)
|
||||
result = await self._execute_tool(server, tool_name, parameters)
|
||||
|
||||
# ========================================
|
||||
|
||||
@@ -41,6 +41,7 @@ SSH 連線:
|
||||
@see docs/superpowers/specs/2026-04-10-infra-rebuild-sprint-abc-design.md §MCP-2a
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
@@ -51,6 +52,7 @@ import structlog
|
||||
from src.plugins.mcp.interfaces import MCPTool, MCPToolProvider, MCPToolResult
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
_asyncssh_logger_configured = False
|
||||
|
||||
# =============================================================================
|
||||
# 安全常數
|
||||
@@ -58,10 +60,17 @@ logger = structlog.get_logger(__name__)
|
||||
|
||||
SSH_KEY_PATH = "/run/secrets/ssh_mcp_key"
|
||||
SSH_USER = "wooo"
|
||||
SSH_PORT = 22
|
||||
DEFAULT_HOST_USERS = {
|
||||
# AI/Web host is operated by the ollama account in the current topology.
|
||||
"192.168.0.188": "ollama",
|
||||
}
|
||||
SHORT_HOST_MAP = {
|
||||
"110": "192.168.0.110",
|
||||
"120": "192.168.0.120",
|
||||
"121": "192.168.0.121",
|
||||
"188": "192.168.0.188",
|
||||
}
|
||||
DIAG_TIMEOUT = 10 # 診斷類超時(秒)
|
||||
OP_TIMEOUT = 60 # 操作類超時(秒)
|
||||
|
||||
@@ -104,6 +113,47 @@ def _validate_param(key: str, value: str) -> str:
|
||||
# tail / port / lines 由呼叫方 int() 轉換,不需字串白名單
|
||||
return value
|
||||
|
||||
|
||||
def _normalize_ssh_host(value: str) -> str:
|
||||
"""
|
||||
Normalize host labels before they enter asyncssh.
|
||||
|
||||
Prometheus labels often arrive as ``192.168.0.110:9100``. That port is the
|
||||
exporter port, not SSH. The SSH provider must connect to the host on the
|
||||
platform SSH port, otherwise asyncssh can receive a stringly port from
|
||||
config/labels and fail with ``%d format`` before the tool even runs.
|
||||
"""
|
||||
host = (value or "").strip()
|
||||
if host.startswith("ssh://"):
|
||||
host = host.removeprefix("ssh://")
|
||||
if "@" in host:
|
||||
host = host.rsplit("@", 1)[1]
|
||||
if host.startswith("[") and "]" in host:
|
||||
return host[1:host.index("]")]
|
||||
if host.count(":") == 1:
|
||||
maybe_host, maybe_port = host.rsplit(":", 1)
|
||||
if maybe_port.isdigit():
|
||||
host = maybe_host
|
||||
if host in SHORT_HOST_MAP:
|
||||
return SHORT_HOST_MAP[host]
|
||||
return host
|
||||
|
||||
|
||||
def _quiet_asyncssh_info_logs() -> None:
|
||||
"""Keep third-party asyncssh INFO logs from breaking stdlib %-format logging.
|
||||
|
||||
Some target SSH servers send exit status as a string. AsyncSSH then emits an
|
||||
INFO log with ``%d`` and that string argument before our code sees the
|
||||
result, which produces noisy ``TypeError: %d format`` tracebacks. The tool
|
||||
result itself is still available, so production should keep asyncssh at
|
||||
WARNING and rely on our structured MCP audit logs.
|
||||
"""
|
||||
global _asyncssh_logger_configured
|
||||
if _asyncssh_logger_configured:
|
||||
return
|
||||
logging.getLogger("asyncssh").setLevel(logging.WARNING)
|
||||
_asyncssh_logger_configured = True
|
||||
|
||||
# 群組 A(只讀)
|
||||
GROUP_A_TOOLS = {
|
||||
"ssh_diagnose",
|
||||
@@ -198,6 +248,10 @@ class SSHProvider(MCPToolProvider):
|
||||
),
|
||||
input_schema={"type": "object", "properties": {
|
||||
"host": {"type": "string", "description": "Target host IP"},
|
||||
"container_name": {
|
||||
"type": "string",
|
||||
"description": "Optional Docker container name for container-focused diagnostics",
|
||||
},
|
||||
}, "required": ["host"]},
|
||||
server_name=self.name,
|
||||
),
|
||||
@@ -375,7 +429,7 @@ class SSHProvider(MCPToolProvider):
|
||||
error=f"Unknown tool: {tool_name}",
|
||||
)
|
||||
|
||||
host = parameters.get("host", "")
|
||||
host = _normalize_ssh_host(str(parameters.get("host", "")))
|
||||
|
||||
# 守衛 2: 允許的 host
|
||||
if host not in self._allowed_hosts():
|
||||
@@ -500,12 +554,23 @@ class SSHProvider(MCPToolProvider):
|
||||
# 所有接受用戶字串的工具,必須先通過 _validate_param() 白名單驗證
|
||||
if tool_name == "ssh_diagnose":
|
||||
# 2026-04-27 Claude Sonnet 4.6: 主機告警自動診斷 — 只讀,不修改任何狀態
|
||||
return (
|
||||
command = (
|
||||
"echo '=== CPU TOP ===' && ps aux --sort=-%cpu | head -15 && "
|
||||
"echo '=== MEMORY ===' && free -h && "
|
||||
"echo '=== DISK ===' && df -h && "
|
||||
"echo '=== LOAD ===' && uptime"
|
||||
)
|
||||
container_name = params.get("container_name")
|
||||
if container_name:
|
||||
name = _validate_param("container_name", str(container_name))
|
||||
command = (
|
||||
f"{command} && "
|
||||
f"echo '=== DOCKER STATS {name} ===' && "
|
||||
f"docker stats --no-stream {name} 2>&1 && "
|
||||
f"echo '=== DOCKER INSPECT {name} ===' && "
|
||||
f"docker inspect {name} 2>&1 | head -80"
|
||||
)
|
||||
return command
|
||||
|
||||
if tool_name == "ssh_get_top_processes":
|
||||
return "ps aux --sort=-%cpu | head -15"
|
||||
@@ -604,7 +669,9 @@ class SSHProvider(MCPToolProvider):
|
||||
raise RuntimeError(
|
||||
"asyncssh is not installed. "
|
||||
"Add 'asyncssh' to pyproject.toml dependencies."
|
||||
)
|
||||
) from None
|
||||
|
||||
_quiet_asyncssh_info_logs()
|
||||
|
||||
import os
|
||||
if not os.path.exists(SSH_KEY_PATH):
|
||||
@@ -625,11 +692,13 @@ class SSHProvider(MCPToolProvider):
|
||||
|
||||
async with asyncssh.connect(
|
||||
host,
|
||||
port=SSH_PORT,
|
||||
username=username or SSH_USER,
|
||||
client_keys=[SSH_KEY_PATH],
|
||||
known_hosts=known_hosts_path, # None = 跳過驗證(內網),或指定文件路徑
|
||||
connect_timeout=timeout,
|
||||
config=None, # 禁止讀取使用者 ssh config,避免 Port 字串污染 asyncssh
|
||||
connect_timeout=float(timeout),
|
||||
) as conn:
|
||||
# Bug 根因:asyncssh 模組沒有頂層 run();應呼叫 conn.run()(2026-04-24 Claude Sonnet 4.6)
|
||||
result = await conn.run(cmd, timeout=timeout, check=False)
|
||||
result = await conn.run(cmd, timeout=float(timeout), check=False)
|
||||
return (result.stdout or ""), (result.stderr or "")
|
||||
|
||||
@@ -106,6 +106,8 @@ def _record_to_request(record: ApprovalRecord) -> ApprovalRequest:
|
||||
# B4 fix 2026-04-24 ogt + Claude Sonnet 4.6: 補回 DB 欄位(人工審核路徑讀回必要)
|
||||
incident_id=getattr(record, "incident_id", None),
|
||||
matched_playbook_id=getattr(record, "matched_playbook_id", None),
|
||||
telegram_message_id=getattr(record, "telegram_message_id", None),
|
||||
telegram_chat_id=getattr(record, "telegram_chat_id", None),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -18,7 +18,14 @@ import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.models.drift import DriftInterpretation, DriftIntent, DriftItem, DriftLevel, DriftReport, DriftStatus
|
||||
from src.models.drift import (
|
||||
DriftIntent,
|
||||
DriftInterpretation,
|
||||
DriftItem,
|
||||
DriftLevel,
|
||||
DriftReport,
|
||||
DriftStatus,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
@@ -167,6 +174,40 @@ class DriftReportRepository:
|
||||
{"report_id": report_id, "narrative": narrative},
|
||||
)
|
||||
|
||||
async def get_repeat_state(
|
||||
self,
|
||||
report: DriftReport,
|
||||
*,
|
||||
include_values: bool = True,
|
||||
) -> dict:
|
||||
"""Return stable fingerprint repeat state for a drift report."""
|
||||
from src.services.drift_repeat_state import build_drift_repeat_state
|
||||
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
text("""
|
||||
SELECT
|
||||
report_id,
|
||||
namespace,
|
||||
status,
|
||||
scanned_at,
|
||||
created_at,
|
||||
items
|
||||
FROM drift_reports
|
||||
WHERE namespace = :namespace
|
||||
AND created_at > now() - interval '24 hours'
|
||||
ORDER BY scanned_at DESC
|
||||
LIMIT 200
|
||||
"""),
|
||||
{"namespace": report.namespace},
|
||||
)
|
||||
rows = [dict(row) for row in result.mappings().all()]
|
||||
return build_drift_repeat_state(
|
||||
report,
|
||||
rows,
|
||||
include_values=include_values,
|
||||
)
|
||||
|
||||
|
||||
_drift_repo: DriftReportRepository | None = None
|
||||
|
||||
|
||||
@@ -356,6 +356,75 @@ async def list_pending(
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
async def list_pending_by_executor(
|
||||
executor_type: str,
|
||||
*,
|
||||
limit: int = 50,
|
||||
) -> list[GovernanceRemediationDispatch]:
|
||||
"""列出指定 executor 的 pending dispatch(按 dispatched_at ASC)。
|
||||
|
||||
用於 Hermes / 其他 worker 消費自己的 work item。由 repository 層集中查詢,
|
||||
避免 job 直接散落表名與狀態條件。
|
||||
|
||||
Args:
|
||||
executor_type: dispatch.executor_type,例如 hermes_kb_growth_healthcheck
|
||||
limit: 本輪最多取幾筆,避免 backlog 一次拖垮 worker
|
||||
|
||||
Returns:
|
||||
最舊優先的 pending dispatch 列表。
|
||||
"""
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
select(GovernanceRemediationDispatch)
|
||||
.where(GovernanceRemediationDispatch.dispatch_status == "pending")
|
||||
.where(GovernanceRemediationDispatch.executor_type == executor_type)
|
||||
.order_by(GovernanceRemediationDispatch.dispatched_at.asc())
|
||||
.limit(limit)
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
async def update_decision_context(
|
||||
dispatch_id: str,
|
||||
decision_context: dict[str, Any],
|
||||
) -> GovernanceRemediationDispatch:
|
||||
"""更新 dispatch 的 decision_context,保留同一 row 的 audit trail。
|
||||
|
||||
這只更新 dispatch work item 的讀模型上下文,不修改 immutable
|
||||
ai_governance_events,也不代表治理事件已被解決。
|
||||
|
||||
Args:
|
||||
dispatch_id: governance_remediation_dispatch.id
|
||||
decision_context: 新的 JSONB context
|
||||
|
||||
Returns:
|
||||
更新後的 GovernanceRemediationDispatch ORM 物件
|
||||
|
||||
Raises:
|
||||
DispatchNotFound: 找不到 dispatch_id
|
||||
"""
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
select(GovernanceRemediationDispatch)
|
||||
.where(GovernanceRemediationDispatch.id == dispatch_id)
|
||||
)
|
||||
row = result.scalar_one_or_none()
|
||||
if row is None:
|
||||
raise DispatchNotFound(f"dispatch_id={dispatch_id!r} 不存在")
|
||||
|
||||
row.decision_context = decision_context
|
||||
await db.flush()
|
||||
await db.refresh(row)
|
||||
|
||||
logger.info(
|
||||
"dispatch_decision_context_updated",
|
||||
dispatch_id=dispatch_id,
|
||||
event_id=row.governance_event_id,
|
||||
executor_type=row.executor_type,
|
||||
)
|
||||
return row
|
||||
|
||||
|
||||
async def list_by_event(
|
||||
event_id: str,
|
||||
) -> list[GovernanceRemediationDispatch]:
|
||||
|
||||
@@ -19,7 +19,12 @@ from sqlalchemy import select
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import IncidentRecord
|
||||
from src.models.incident import Incident, IncidentFrequencyStats, IncidentStatus, Severity
|
||||
from src.models.incident import (
|
||||
Incident,
|
||||
IncidentFrequencyStats,
|
||||
IncidentStatus,
|
||||
Severity,
|
||||
)
|
||||
from src.repositories.interfaces import IIncidentRepository
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -41,8 +46,8 @@ def _record_to_incident(record: IncidentRecord) -> Incident:
|
||||
|
||||
return Incident(
|
||||
incident_id=record.incident_id,
|
||||
status=IncidentStatus(record.status),
|
||||
severity=Severity(record.severity),
|
||||
status=IncidentStatus(_normalize_status(record.status)),
|
||||
severity=Severity(_normalize_severity(record.severity)),
|
||||
signals=record.signals or [],
|
||||
affected_services=record.affected_services or [],
|
||||
proposal_ids=record.proposal_ids or [],
|
||||
@@ -93,6 +98,36 @@ def _incident_to_record_data(incident: Incident) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _normalize_status(value: str | IncidentStatus) -> str:
|
||||
if isinstance(value, IncidentStatus):
|
||||
return value.value
|
||||
raw = str(value)
|
||||
if raw in IncidentStatus.__members__:
|
||||
return IncidentStatus[raw].value
|
||||
normalized = raw.strip().lower()
|
||||
if normalized == "open":
|
||||
return IncidentStatus.INVESTIGATING.value
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_severity(value: str | Severity) -> str:
|
||||
if isinstance(value, Severity):
|
||||
return value.value
|
||||
raw = str(value)
|
||||
if raw in Severity.__members__:
|
||||
return Severity[raw].value
|
||||
legacy_map = {
|
||||
"critical": Severity.P0.value,
|
||||
"high": Severity.P1.value,
|
||||
"warning": Severity.P2.value,
|
||||
"medium": Severity.P2.value,
|
||||
"info": Severity.P3.value,
|
||||
"low": Severity.P3.value,
|
||||
"none": Severity.P3.value,
|
||||
}
|
||||
return legacy_map.get(raw.strip().lower(), raw)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# IncidentDBRepository
|
||||
# =============================================================================
|
||||
@@ -136,8 +171,8 @@ class IncidentDBRepository(IIncidentRepository):
|
||||
async def get_active(self) -> list[Incident]:
|
||||
"""取得所有活躍的 Incident"""
|
||||
active_statuses = [
|
||||
IncidentStatus.INVESTIGATING.value,
|
||||
IncidentStatus.MITIGATING.value,
|
||||
IncidentStatus.INVESTIGATING,
|
||||
IncidentStatus.MITIGATING,
|
||||
]
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
|
||||
@@ -190,7 +190,7 @@ class KnowledgeDBRepository:
|
||||
count_query = count_query.where(KnowledgeEntryRecord.status == status)
|
||||
if tags:
|
||||
for tag in tags:
|
||||
tag_filter = KnowledgeEntryRecord.tags.op('@>')(f'["{tag}"]')
|
||||
tag_filter = _json_string_array_has_tag(tag)
|
||||
query = query.where(tag_filter)
|
||||
count_query = count_query.where(tag_filter)
|
||||
if q:
|
||||
@@ -347,3 +347,18 @@ class KnowledgeDBRepository:
|
||||
created_at=record.created_at,
|
||||
updated_at=record.updated_at,
|
||||
)
|
||||
|
||||
|
||||
def _json_string_array_has_tag(tag: str):
|
||||
"""建立 JSON/JSONB 皆相容的 tag filter。
|
||||
|
||||
production 的 knowledge_entries.tags 目前是 JSON 欄位,不支援 json @> text。
|
||||
這裡改用帶引號的字串比對,避免把 tag 片段誤判成完整 tag。
|
||||
"""
|
||||
escaped = (
|
||||
tag
|
||||
.replace("\\", "\\\\")
|
||||
.replace("%", "\\%")
|
||||
.replace("_", "\\_")
|
||||
)
|
||||
return KnowledgeEntryRecord.tags.cast(String).ilike(f'%"{escaped}"%', escape="\\")
|
||||
|
||||
@@ -60,13 +60,17 @@ class MetricsDBRepository(IMetricsRepository):
|
||||
cutoff = datetime.now(UTC) - timedelta(hours=hours)
|
||||
|
||||
# Query: 統計 executed vs total (approved + executed + execution_failed)
|
||||
# 2026-05-06 ogt + Codex:
|
||||
# approval_records.status 目前實際寫入的是大寫 enum
|
||||
# (APPROVED / EXECUTION_SUCCESS / EXECUTION_FAILED)。舊查詢只看
|
||||
# lowercase executed,導致 AI Success 在報表層永遠趨近 0。
|
||||
query = text("""
|
||||
SELECT
|
||||
COUNT(CASE WHEN status = 'executed' THEN 1 END) as executed_count,
|
||||
COUNT(CASE WHEN UPPER(status::text) = 'EXECUTION_SUCCESS' THEN 1 END) as executed_count,
|
||||
COUNT(*) as total_count
|
||||
FROM approval_records
|
||||
WHERE created_at >= :cutoff
|
||||
AND status IN ('approved', 'executed', 'execution_failed')
|
||||
AND UPPER(status::text) IN ('APPROVED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED')
|
||||
""")
|
||||
|
||||
result = await session.execute(query, {"cutoff": cutoff})
|
||||
@@ -127,11 +131,11 @@ class MetricsDBRepository(IMetricsRepository):
|
||||
trend_query = text("""
|
||||
SELECT
|
||||
date_trunc('hour', created_at) as hour_bucket,
|
||||
COUNT(CASE WHEN status = 'executed' THEN 1 END) * 100.0 /
|
||||
COUNT(CASE WHEN UPPER(status::text) = 'EXECUTION_SUCCESS' THEN 1 END) * 100.0 /
|
||||
NULLIF(COUNT(*), 0) as hourly_rate
|
||||
FROM approval_records
|
||||
WHERE created_at >= :cutoff
|
||||
AND status IN ('approved', 'executed', 'execution_failed')
|
||||
AND UPPER(status::text) IN ('APPROVED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED')
|
||||
GROUP BY hour_bucket
|
||||
ORDER BY hour_bucket DESC
|
||||
LIMIT :limit
|
||||
|
||||
@@ -19,10 +19,11 @@ router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ==================== Ollama Config ====================
|
||||
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
|
||||
def _get_ollama_base_url() -> str:
|
||||
from src.core.config import get_settings
|
||||
return get_settings().OLLAMA_URL
|
||||
# 2026-05-19 Codex: agent thinking stream follows GCP-A → GCP-B → 111.
|
||||
def _get_ollama_endpoints():
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||
|
||||
return resolve_ollama_order("interactive")
|
||||
OLLAMA_MODEL = "llama3.2:latest" # 可根據實際部署調整
|
||||
OLLAMA_TIMEOUT = 120.0 # 串流超時
|
||||
|
||||
@@ -104,7 +105,7 @@ async def get_agent_thinking(
|
||||
) -> StreamingResponse:
|
||||
"""
|
||||
OpenClaw 思考軌跡 (SSE 串流)
|
||||
Phase 1.2: 真實串接 Ollama at 192.168.0.188:11434
|
||||
Phase 1.2: 真實串接設定中的 Ollama provider pool
|
||||
"""
|
||||
|
||||
async def generate_thinking_stream():
|
||||
@@ -112,66 +113,82 @@ async def get_agent_thinking(
|
||||
# 1. 開始思考
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': '正在連接 AI 模型...'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=OLLAMA_TIMEOUT) as client:
|
||||
# 2. 發送請求到 Ollama
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': f'模型: {model}'}, ensure_ascii=False)}\n\n"
|
||||
last_error = ""
|
||||
async with httpx.AsyncClient(timeout=OLLAMA_TIMEOUT) as client:
|
||||
# 2. 發送請求到 Ollama
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': f'模型: {model}'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
async with client.stream(
|
||||
"POST",
|
||||
f"{_get_ollama_base_url()}/api/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": True,
|
||||
},
|
||||
) as response:
|
||||
if response.status_code != 200:
|
||||
yield f"data: {json.dumps({'type': 'error', 'content': f'Ollama 錯誤: HTTP {response.status_code}'}, ensure_ascii=False)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
return
|
||||
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': '開始接收 AI 回應...'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
# 3. 串流讀取 Ollama 回應
|
||||
buffer = ""
|
||||
async for line in response.aiter_lines():
|
||||
if not line:
|
||||
for endpoint in _get_ollama_endpoints():
|
||||
if not endpoint.url:
|
||||
continue
|
||||
try:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
f"{endpoint.url}/api/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": True,
|
||||
},
|
||||
) as response:
|
||||
if response.status_code != 200:
|
||||
last_error = f"HTTP {response.status_code}"
|
||||
logger.warning(
|
||||
"agent_thinking_ollama_http_error",
|
||||
provider=endpoint.provider_name,
|
||||
status=response.status_code,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
chunk = json.loads(line)
|
||||
token = chunk.get("response", "")
|
||||
done = chunk.get("done", False)
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': '開始接收 AI 回應...'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
if token:
|
||||
# 累積 token,每 10 字符或遇到標點符號時發送
|
||||
buffer += token
|
||||
if len(buffer) >= 10 or any(p in buffer for p in "。!?,、\n"):
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n"
|
||||
buffer = ""
|
||||
# 3. 串流讀取 Ollama 回應
|
||||
buffer = ""
|
||||
async for line in response.aiter_lines():
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if done:
|
||||
# 發送剩餘 buffer
|
||||
if buffer:
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n"
|
||||
# 發送完成訊息
|
||||
yield f"data: {json.dumps({'type': 'result', 'content': '分析完成'}, ensure_ascii=False)}\n\n"
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(line)
|
||||
token = chunk.get("response", "")
|
||||
done = chunk.get("done", False)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"JSON 解析失敗: {line[:100]}... - {e}")
|
||||
continue
|
||||
if token:
|
||||
# 累積 token,每 10 字符或遇到標點符號時發送
|
||||
buffer += token
|
||||
if len(buffer) >= 10 or any(p in buffer for p in "。!?,、\n"):
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n"
|
||||
buffer = ""
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
logger.error(f"無法連接 Ollama: {e}")
|
||||
yield f"data: {json.dumps({'type': 'error', 'content': f'無法連接 Ollama ({_get_ollama_base_url()})'}, ensure_ascii=False)}\n\n"
|
||||
except httpx.TimeoutException as e:
|
||||
logger.error(f"Ollama 超時: {e}")
|
||||
yield f"data: {json.dumps({'type': 'error', 'content': '請求超時'}, ensure_ascii=False)}\n\n"
|
||||
except Exception as e:
|
||||
logger.error(f"未知錯誤: {e}")
|
||||
yield f"data: {json.dumps({'type': 'error', 'content': f'未知錯誤: {str(e)}'}, ensure_ascii=False)}\n\n"
|
||||
if done:
|
||||
# 發送剩餘 buffer
|
||||
if buffer:
|
||||
yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n"
|
||||
# 發送完成訊息
|
||||
yield f"data: {json.dumps({'type': 'result', 'content': '分析完成'}, ensure_ascii=False)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
return
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"JSON 解析失敗: {line[:100]}... - {e}")
|
||||
continue
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
last_error = type(e).__name__
|
||||
logger.error(
|
||||
"agent_thinking_ollama_endpoint_failed",
|
||||
provider=endpoint.provider_name,
|
||||
error=str(e),
|
||||
)
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
logger.error(
|
||||
"agent_thinking_unknown_error",
|
||||
provider=endpoint.provider_name,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
error_content = f"Ollama 全端點不可用: {last_error or 'unknown'}"
|
||||
yield f"data: {json.dumps({'type': 'error', 'content': error_content}, ensure_ascii=False)}\n\n"
|
||||
|
||||
# 4. 結束標記
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
606
apps/api/src/services/adr100_remediation_service.py
Normal file
606
apps/api/src/services/adr100_remediation_service.py
Normal file
@@ -0,0 +1,606 @@
|
||||
"""
|
||||
ADR-100 Remediation Service
|
||||
===========================
|
||||
Safe operator entrypoints for verification remediation work items.
|
||||
|
||||
T25: remediation queue items are now actionable without mutating incident state:
|
||||
- preview: show the selected guardrail path
|
||||
- dry-run: collect read-only current state and validate supported executor routing
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Literal, Protocol
|
||||
|
||||
import structlog
|
||||
|
||||
from src.models.incident import Incident
|
||||
from src.repositories.incident_repository import IncidentDBRepository
|
||||
from src.services.adr100_slo_status_service import (
|
||||
Adr100SloStatusService,
|
||||
get_adr100_slo_status_service,
|
||||
)
|
||||
from src.services.auto_repair_service import AutoRepairService
|
||||
from src.services.post_execution_verifier import (
|
||||
PostExecutionVerifier,
|
||||
_assess_recovery,
|
||||
_build_prometheus_query,
|
||||
get_post_execution_verifier,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
RemediationMode = Literal["auto", "reverify", "replay"]
|
||||
|
||||
_READY_STATUSES = {"ready_for_replay", "ready_for_reverify"}
|
||||
|
||||
|
||||
class RemediationNotFoundError(LookupError):
|
||||
"""Requested ADR-100 remediation work item is not in the current read model."""
|
||||
|
||||
|
||||
class _IncidentRepository(Protocol):
|
||||
async def get_by_id(self, incident_id: str) -> Incident | None:
|
||||
...
|
||||
|
||||
|
||||
class Adr100RemediationService:
|
||||
"""Read-only remediation preview and dry-run service."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
slo_service: Adr100SloStatusService | None = None,
|
||||
incident_repository: _IncidentRepository | None = None,
|
||||
auto_repair_service: AutoRepairService | None = None,
|
||||
verifier: PostExecutionVerifier | None = None,
|
||||
timeline_service: Any | None = None,
|
||||
alert_operation_log_repository: Any | None = None,
|
||||
record_history: bool = True,
|
||||
) -> None:
|
||||
self._slo_service = slo_service or get_adr100_slo_status_service()
|
||||
self._incident_repository = incident_repository or IncidentDBRepository()
|
||||
self._auto_repair_service = auto_repair_service or AutoRepairService()
|
||||
self._verifier = verifier or get_post_execution_verifier()
|
||||
self._timeline_service = timeline_service
|
||||
self._alert_operation_log_repository = alert_operation_log_repository
|
||||
self._record_history_enabled = record_history
|
||||
|
||||
async def preview(self, work_item_id: str, mode: RemediationMode = "auto") -> dict[str, Any]:
|
||||
"""Return the safe execution plan for a remediation queue item."""
|
||||
|
||||
item = await self._find_work_item(work_item_id)
|
||||
selected_mode = _select_mode(item, mode)
|
||||
checks = _base_checks(item)
|
||||
allowed = all(check["passed"] for check in checks)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_remediation_preview_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"mode": selected_mode,
|
||||
"allowed": allowed,
|
||||
"safety_level": "read_only",
|
||||
"writes_incident_state": False,
|
||||
"writes_auto_repair_result": False,
|
||||
"checks": checks,
|
||||
"plan": _plan_for_item(item, selected_mode),
|
||||
"source": "adr100.verification_coverage.remediation_queue",
|
||||
}
|
||||
|
||||
async def dry_run(self, work_item_id: str, mode: RemediationMode = "auto") -> dict[str, Any]:
|
||||
"""Run a safe, read-only remediation dry-run for one queue item."""
|
||||
|
||||
item = await self._find_work_item(work_item_id)
|
||||
selected_mode = _select_mode(item, mode)
|
||||
checks = _base_checks(item)
|
||||
incident = await self._load_incident(item)
|
||||
checks.append({
|
||||
"name": "incident_loaded",
|
||||
"passed": incident is not None,
|
||||
"detail": item.get("incident_id") or "missing incident_id",
|
||||
})
|
||||
|
||||
if incident is None or not all(check["passed"] for check in checks):
|
||||
payload = _dry_run_blocked_payload(item, selected_mode, checks)
|
||||
payload["history"] = await self._record_dry_run_history(item, payload)
|
||||
return payload
|
||||
|
||||
if selected_mode == "replay":
|
||||
return await self._dry_run_replay(item, incident, checks)
|
||||
return await self._dry_run_reverify(item, incident, checks)
|
||||
|
||||
async def history(
|
||||
self,
|
||||
*,
|
||||
limit: int = 50,
|
||||
incident_id: str | None = None,
|
||||
work_item_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Return durable dry-run history written by this remediation service."""
|
||||
|
||||
safe_limit = max(1, min(limit, 200))
|
||||
fetch_limit = min(max(safe_limit * 4, 50), 200)
|
||||
rows: list[Any] = []
|
||||
repo = self._alert_operation_log_repository
|
||||
if repo is None:
|
||||
from src.repositories.alert_operation_log_repository import (
|
||||
get_alert_operation_log_repository,
|
||||
)
|
||||
|
||||
repo = get_alert_operation_log_repository()
|
||||
|
||||
for event_type in ("PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED"):
|
||||
try:
|
||||
batch, _total = await repo.list_recent(
|
||||
limit=fetch_limit,
|
||||
event_type=event_type,
|
||||
incident_id=incident_id,
|
||||
)
|
||||
rows.extend(batch)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_history_fetch_failed",
|
||||
event_type=event_type,
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
rows.sort(key=_record_created_at, reverse=True)
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
context = getattr(row, "context", None) or {}
|
||||
if context.get("schema_version") != "adr100_remediation_dry_run_history_v1":
|
||||
continue
|
||||
if work_item_id and context.get("work_item_id") != work_item_id:
|
||||
continue
|
||||
items.append(_history_item(row, context))
|
||||
if len(items) >= safe_limit:
|
||||
break
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_remediation_history_v1",
|
||||
"total": len(items),
|
||||
"limit": safe_limit,
|
||||
"filters": {
|
||||
"incident_id": incident_id,
|
||||
"work_item_id": work_item_id,
|
||||
},
|
||||
"items": items,
|
||||
"by_work_item": _summarize_history_by_work_item(items),
|
||||
}
|
||||
|
||||
async def _find_work_item(self, work_item_id: str) -> dict[str, Any]:
|
||||
report = await self._slo_service.fetch_report()
|
||||
coverage = report.get("verification_coverage") or {}
|
||||
queue = coverage.get("remediation_queue") or {}
|
||||
|
||||
for item in queue.get("items") or []:
|
||||
if item.get("work_item_id") == work_item_id:
|
||||
return dict(item)
|
||||
|
||||
raise RemediationNotFoundError(work_item_id)
|
||||
|
||||
async def _load_incident(self, item: dict[str, Any]) -> Incident | None:
|
||||
incident_id = str(item.get("incident_id") or "")
|
||||
if not incident_id:
|
||||
return None
|
||||
return await self._incident_repository.get_by_id(incident_id)
|
||||
|
||||
async def _dry_run_reverify(
|
||||
self,
|
||||
item: dict[str, Any],
|
||||
incident: Incident,
|
||||
checks: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
post_state = await self._collect_current_state(incident)
|
||||
action_taken = f"dry_run_reverify:{item.get('playbook_id') or 'unknown'}"
|
||||
result = _assess_recovery(None, post_state, action_taken)
|
||||
|
||||
payload = _dry_run_result_payload(
|
||||
item=item,
|
||||
mode="reverify",
|
||||
checks=checks,
|
||||
post_state=post_state,
|
||||
verification_result_preview=result,
|
||||
extra={
|
||||
"promql": _promql_for_incident(incident),
|
||||
"mcp_route": {
|
||||
"agent_id": "post_execution_verifier",
|
||||
"required_scope": "read",
|
||||
"is_shadow": True,
|
||||
"flywheel_node": "verify",
|
||||
},
|
||||
},
|
||||
)
|
||||
payload["history"] = await self._record_dry_run_history(item, payload)
|
||||
return payload
|
||||
|
||||
async def _dry_run_replay(
|
||||
self,
|
||||
item: dict[str, Any],
|
||||
incident: Incident,
|
||||
checks: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
diagnostic_command = _diagnostic_command_for_incident(incident)
|
||||
route = self._auto_repair_service.preview_read_only_ssh_mcp_route(
|
||||
incident,
|
||||
diagnostic_command,
|
||||
)
|
||||
checks.append({
|
||||
"name": "supported_executor_route",
|
||||
"passed": route is not None,
|
||||
"detail": "mcp:ssh_diagnose" if route else "missing host/container route",
|
||||
})
|
||||
|
||||
post_state = await self._collect_current_state(incident)
|
||||
action_taken = f"dry_run_replay:{item.get('playbook_id') or 'unknown'}"
|
||||
result = _assess_recovery(None, post_state, action_taken)
|
||||
|
||||
payload = _dry_run_result_payload(
|
||||
item=item,
|
||||
mode="replay",
|
||||
checks=checks,
|
||||
post_state=post_state,
|
||||
verification_result_preview=result,
|
||||
extra={
|
||||
"diagnostic_command_preview": diagnostic_command,
|
||||
"mcp_route": route,
|
||||
"promql": _promql_for_incident(incident),
|
||||
},
|
||||
)
|
||||
payload["history"] = await self._record_dry_run_history(item, payload)
|
||||
return payload
|
||||
|
||||
async def _collect_current_state(self, incident: Incident) -> dict[str, Any]:
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
self._verifier._collect_post_state(incident),
|
||||
timeout=12.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"adr100_remediation_dry_run_timeout",
|
||||
incident_id=incident.incident_id,
|
||||
)
|
||||
return {}
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_dry_run_collect_failed",
|
||||
incident_id=incident.incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
return {}
|
||||
|
||||
async def _record_dry_run_history(
|
||||
self,
|
||||
item: dict[str, Any],
|
||||
payload: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
if not self._record_history_enabled:
|
||||
return {"recorded": False, "reason": "disabled"}
|
||||
|
||||
incident_id = str(item.get("incident_id") or "")
|
||||
if not incident_id:
|
||||
return {"recorded": False, "reason": "missing_incident_id"}
|
||||
|
||||
history: dict[str, Any] = {
|
||||
"recorded": False,
|
||||
"alert_operation_id": None,
|
||||
"timeline_event_id": None,
|
||||
}
|
||||
context = _history_context(item, payload)
|
||||
allowed = bool(payload.get("allowed"))
|
||||
|
||||
try:
|
||||
repo = self._alert_operation_log_repository
|
||||
if repo is None:
|
||||
from src.repositories.alert_operation_log_repository import (
|
||||
get_alert_operation_log_repository,
|
||||
)
|
||||
|
||||
repo = get_alert_operation_log_repository()
|
||||
record = await repo.append(
|
||||
"PRE_FLIGHT_PASSED" if allowed else "PRE_FLIGHT_FAILED",
|
||||
incident_id=incident_id,
|
||||
auto_repair_id=str(item.get("auto_repair_id") or "") or None,
|
||||
actor="adr100_remediation_service",
|
||||
action_detail=f"adr100_remediation_dry_run:{payload.get('mode')}"[:200],
|
||||
success=allowed,
|
||||
context=context,
|
||||
)
|
||||
if record is not None:
|
||||
history["alert_operation_id"] = getattr(record, "id", None)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_alert_operation_history_failed",
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
try:
|
||||
timeline = self._timeline_service
|
||||
if timeline is None:
|
||||
from src.services.approval_db import get_timeline_service
|
||||
|
||||
timeline = get_timeline_service()
|
||||
event = await timeline.add_event(
|
||||
event_type="verifier",
|
||||
status=_timeline_status(payload),
|
||||
title="ADR-100 remediation dry-run",
|
||||
description=_history_description(context),
|
||||
actor="adr100_remediation_service",
|
||||
actor_role=str(payload.get("mode") or "dry_run"),
|
||||
incident_id=incident_id,
|
||||
)
|
||||
if event:
|
||||
history["timeline_event_id"] = event.get("id")
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_timeline_history_failed",
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
history["recorded"] = bool(
|
||||
history.get("alert_operation_id") or history.get("timeline_event_id")
|
||||
)
|
||||
return history
|
||||
|
||||
|
||||
def _select_mode(item: dict[str, Any], requested: RemediationMode) -> Literal["reverify", "replay"]:
|
||||
if requested in ("reverify", "replay"):
|
||||
return requested
|
||||
if item.get("remediation_status") == "ready_for_reverify":
|
||||
return "reverify"
|
||||
if item.get("remediation_action") == "reverify_with_promql_template":
|
||||
return "reverify"
|
||||
return "replay"
|
||||
|
||||
|
||||
def _base_checks(item: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
status = str(item.get("remediation_status") or "unknown")
|
||||
action = str(item.get("remediation_action") or "unknown")
|
||||
return [
|
||||
{
|
||||
"name": "queue_item_ready",
|
||||
"passed": status in _READY_STATUSES,
|
||||
"detail": status,
|
||||
},
|
||||
{
|
||||
"name": "read_only_guardrail",
|
||||
"passed": action in {
|
||||
"replay_with_supported_executor",
|
||||
"reverify_with_promql_template",
|
||||
},
|
||||
"detail": action,
|
||||
},
|
||||
{
|
||||
"name": "no_state_mutation",
|
||||
"passed": True,
|
||||
"detail": "dry_run_does_not_update_incident_or_auto_repair_rows",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _plan_for_item(item: dict[str, Any], mode: str) -> dict[str, Any]:
|
||||
if mode == "reverify":
|
||||
return {
|
||||
"step": "collect_current_state_and_assess",
|
||||
"agent_id": "post_execution_verifier",
|
||||
"required_scope": "read",
|
||||
"writes": [],
|
||||
}
|
||||
return {
|
||||
"step": "validate_supported_executor_route_then_collect_current_state",
|
||||
"agent_id": "auto_repair_executor",
|
||||
"required_scope": "read",
|
||||
"writes": [],
|
||||
"target_action": item.get("remediation_action"),
|
||||
}
|
||||
|
||||
|
||||
def _dry_run_blocked_payload(
|
||||
item: dict[str, Any],
|
||||
mode: str,
|
||||
checks: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"schema_version": "adr100_remediation_dry_run_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"mode": mode,
|
||||
"allowed": False,
|
||||
"executed": False,
|
||||
"safety_level": "read_only",
|
||||
"writes_incident_state": False,
|
||||
"writes_auto_repair_result": False,
|
||||
"checks": checks,
|
||||
"verification_result_preview": "blocked",
|
||||
"post_state_summary": {},
|
||||
}
|
||||
|
||||
|
||||
def _dry_run_result_payload(
|
||||
*,
|
||||
item: dict[str, Any],
|
||||
mode: str,
|
||||
checks: list[dict[str, Any]],
|
||||
post_state: dict[str, Any],
|
||||
verification_result_preview: str,
|
||||
extra: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"schema_version": "adr100_remediation_dry_run_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"mode": mode,
|
||||
"allowed": all(check["passed"] for check in checks),
|
||||
"executed": True,
|
||||
"safety_level": "read_only",
|
||||
"writes_incident_state": False,
|
||||
"writes_auto_repair_result": False,
|
||||
"checks": checks,
|
||||
"verification_result_preview": verification_result_preview,
|
||||
"post_state_summary": _summarize_post_state(post_state),
|
||||
**extra,
|
||||
}
|
||||
|
||||
|
||||
def _summarize_post_state(post_state: dict[str, Any]) -> dict[str, Any]:
|
||||
keys = sorted(post_state.keys())
|
||||
return {
|
||||
"tool_count": len(keys),
|
||||
"tools": keys[:8],
|
||||
"has_state": bool(post_state),
|
||||
}
|
||||
|
||||
|
||||
def _history_context(item: dict[str, Any], payload: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"schema_version": "adr100_remediation_dry_run_history_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"playbook_id": item.get("playbook_id"),
|
||||
"alertname": item.get("alertname"),
|
||||
"mode": payload.get("mode"),
|
||||
"allowed": payload.get("allowed"),
|
||||
"executed": payload.get("executed"),
|
||||
"safety_level": payload.get("safety_level"),
|
||||
"writes_incident_state": payload.get("writes_incident_state"),
|
||||
"writes_auto_repair_result": payload.get("writes_auto_repair_result"),
|
||||
"verification_result_preview": payload.get("verification_result_preview"),
|
||||
"post_state_summary": payload.get("post_state_summary"),
|
||||
"mcp_route": payload.get("mcp_route"),
|
||||
"checks": payload.get("checks"),
|
||||
}
|
||||
|
||||
|
||||
def _timeline_status(payload: dict[str, Any]) -> str:
|
||||
if not payload.get("allowed"):
|
||||
return "warning"
|
||||
if payload.get("verification_result_preview") == "success":
|
||||
return "success"
|
||||
return "warning"
|
||||
|
||||
|
||||
def _history_description(context: dict[str, Any]) -> str:
|
||||
tool_count = (context.get("post_state_summary") or {}).get("tool_count", 0)
|
||||
route = context.get("mcp_route") or {}
|
||||
agent = route.get("agent_id") or "unknown_agent"
|
||||
tool = route.get("tool_name") or "current_state"
|
||||
return (
|
||||
f"mode={context.get('mode')} "
|
||||
f"preview={context.get('verification_result_preview')} "
|
||||
f"tools={tool_count} route={agent}/{tool} "
|
||||
f"writes_incident={context.get('writes_incident_state')} "
|
||||
f"writes_auto_repair={context.get('writes_auto_repair_result')}"
|
||||
)[:500]
|
||||
|
||||
|
||||
def _record_created_at(record: Any) -> str:
|
||||
value = getattr(record, "created_at", None)
|
||||
if hasattr(value, "isoformat"):
|
||||
return value.isoformat()
|
||||
return str(value or "")
|
||||
|
||||
|
||||
def _history_item(record: Any, context: dict[str, Any]) -> dict[str, Any]:
|
||||
route = context.get("mcp_route") or {}
|
||||
post_state = context.get("post_state_summary") or {}
|
||||
return {
|
||||
"id": str(getattr(record, "id", "")),
|
||||
"incident_id": getattr(record, "incident_id", None),
|
||||
"auto_repair_id": getattr(record, "auto_repair_id", None)
|
||||
or context.get("auto_repair_id"),
|
||||
"event_type": str(getattr(record, "event_type", "")),
|
||||
"actor": getattr(record, "actor", None),
|
||||
"success": getattr(record, "success", None),
|
||||
"created_at": _record_created_at(record),
|
||||
"work_item_id": context.get("work_item_id"),
|
||||
"playbook_id": context.get("playbook_id"),
|
||||
"alertname": context.get("alertname"),
|
||||
"mode": context.get("mode"),
|
||||
"allowed": context.get("allowed"),
|
||||
"executed": context.get("executed"),
|
||||
"safety_level": context.get("safety_level"),
|
||||
"verification_result_preview": context.get("verification_result_preview"),
|
||||
"tool_count": post_state.get("tool_count", 0),
|
||||
"tools": post_state.get("tools") or [],
|
||||
"agent_id": route.get("agent_id"),
|
||||
"tool_name": route.get("tool_name") or "current_state",
|
||||
"required_scope": route.get("required_scope"),
|
||||
"writes_incident_state": context.get("writes_incident_state"),
|
||||
"writes_auto_repair_result": context.get("writes_auto_repair_result"),
|
||||
"checks": context.get("checks") or [],
|
||||
}
|
||||
|
||||
|
||||
def _summarize_history_by_work_item(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
summary: dict[str, dict[str, Any]] = {}
|
||||
for item in items:
|
||||
key = str(item.get("work_item_id") or item.get("incident_id") or item.get("id"))
|
||||
if key not in summary:
|
||||
summary[key] = {
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"count": 0,
|
||||
"latest_at": item.get("created_at"),
|
||||
"latest_event_type": item.get("event_type"),
|
||||
"latest_success": item.get("success"),
|
||||
"latest_preview": item.get("verification_result_preview"),
|
||||
"latest_mode": item.get("mode"),
|
||||
"latest_agent_id": item.get("agent_id"),
|
||||
"latest_tool_name": item.get("tool_name"),
|
||||
"required_scope": item.get("required_scope"),
|
||||
}
|
||||
summary[key]["count"] += 1
|
||||
return list(summary.values())
|
||||
|
||||
|
||||
def _diagnostic_command_for_incident(incident: Incident) -> str:
|
||||
labels = _labels_for_incident(incident)
|
||||
host = str(labels.get("host") or labels.get("instance") or "{host}")
|
||||
container = str(labels.get("container_name") or labels.get("container") or "")
|
||||
if container:
|
||||
return f"ssh {host} 'uptime; docker stats --no-stream {container}'"
|
||||
return f"ssh {host} 'uptime; docker stats --no-stream'"
|
||||
|
||||
|
||||
def _promql_for_incident(incident: Incident) -> str:
|
||||
labels = _labels_for_incident(incident)
|
||||
alertname = ""
|
||||
if incident.signals:
|
||||
signal = incident.signals[0]
|
||||
alertname = labels.get("alertname") or getattr(signal, "alert_name", "")
|
||||
return _build_prometheus_query(alertname, labels)
|
||||
|
||||
|
||||
def _labels_for_incident(incident: Incident) -> dict[str, Any]:
|
||||
if incident.signals:
|
||||
return incident.signals[0].labels or {}
|
||||
return {}
|
||||
|
||||
|
||||
_service: Adr100RemediationService | None = None
|
||||
|
||||
|
||||
def get_adr100_remediation_service() -> Adr100RemediationService:
|
||||
"""Return singleton ADR-100 remediation service."""
|
||||
|
||||
global _service
|
||||
if _service is None:
|
||||
_service = Adr100RemediationService()
|
||||
return _service
|
||||
|
||||
|
||||
def set_adr100_remediation_service(service: Adr100RemediationService | None) -> None:
|
||||
"""Inject ADR-100 remediation service for tests."""
|
||||
|
||||
global _service
|
||||
_service = service
|
||||
354
apps/api/src/services/adr100_slo_metrics_service.py
Normal file
354
apps/api/src/services/adr100_slo_metrics_service.py
Normal file
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
ADR-100 SLO metrics emitter.
|
||||
|
||||
Prometheus recording rules for the AI flywheel SLOs expect a small set of
|
||||
counter-like metrics. The source of truth already lives in PostgreSQL, so this
|
||||
read-side emitter exposes DB totals on /metrics without changing runtime write
|
||||
paths or introducing another state store.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from time import time
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AutomationOperationSample:
|
||||
outcome: str
|
||||
operation_type: str
|
||||
count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VerificationSample:
|
||||
outcome: str
|
||||
count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Adr100SloMetricsSnapshot:
|
||||
automation_operations: list[AutomationOperationSample] = field(default_factory=list)
|
||||
automation_operations_24h: list[AutomationOperationSample] = field(default_factory=list)
|
||||
post_execution_verifications: list[VerificationSample] = field(default_factory=list)
|
||||
post_execution_verifications_24h: list[VerificationSample] = field(default_factory=list)
|
||||
knowledge_entries_total: int = 0
|
||||
knowledge_entries_created_24h: int = 0
|
||||
high_confidence_total: int = 0
|
||||
high_confidence_success_total: int = 0
|
||||
emitted_at: float = field(default_factory=time)
|
||||
|
||||
|
||||
class Adr100SloMetricsService:
|
||||
"""Build ADR-100 Prometheus samples from production DB state."""
|
||||
|
||||
async def to_prometheus_lines(self) -> str:
|
||||
snapshot = await self.fetch_snapshot()
|
||||
return render_adr100_slo_metrics(snapshot)
|
||||
|
||||
async def fetch_snapshot(self) -> Adr100SloMetricsSnapshot:
|
||||
async with get_db_context() as db:
|
||||
automation_rows = (
|
||||
await db.execute(text(_AUTOMATION_OPERATION_SQL))
|
||||
).fetchall()
|
||||
automation_24h_rows = (
|
||||
await db.execute(text(_AUTOMATION_OPERATION_24H_SQL))
|
||||
).fetchall()
|
||||
verification_rows = (
|
||||
await db.execute(text(_POST_EXECUTION_VERIFICATION_SQL))
|
||||
).fetchall()
|
||||
verification_24h_rows = (
|
||||
await db.execute(text(_POST_EXECUTION_VERIFICATION_24H_SQL))
|
||||
).fetchall()
|
||||
knowledge_total = int(
|
||||
(await db.execute(text("SELECT count(*) FROM knowledge_entries"))).scalar()
|
||||
or 0
|
||||
)
|
||||
knowledge_created_24h = int(
|
||||
(
|
||||
await db.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT count(*)
|
||||
FROM knowledge_entries
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
"""
|
||||
)
|
||||
)
|
||||
).scalar()
|
||||
or 0
|
||||
)
|
||||
confidence_row = (
|
||||
await db.execute(text(_HIGH_CONFIDENCE_APPROVAL_SQL))
|
||||
).one()
|
||||
|
||||
return Adr100SloMetricsSnapshot(
|
||||
automation_operations=[
|
||||
AutomationOperationSample(
|
||||
outcome=str(row.outcome),
|
||||
operation_type=str(row.operation_type),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in automation_rows
|
||||
],
|
||||
automation_operations_24h=[
|
||||
AutomationOperationSample(
|
||||
outcome=str(row.outcome),
|
||||
operation_type=str(row.operation_type),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in automation_24h_rows
|
||||
],
|
||||
post_execution_verifications=[
|
||||
VerificationSample(
|
||||
outcome=str(row.outcome),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in verification_rows
|
||||
],
|
||||
post_execution_verifications_24h=[
|
||||
VerificationSample(
|
||||
outcome=str(row.outcome),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in verification_24h_rows
|
||||
],
|
||||
knowledge_entries_total=knowledge_total,
|
||||
knowledge_entries_created_24h=knowledge_created_24h,
|
||||
high_confidence_total=int(confidence_row.high_confidence_total or 0),
|
||||
high_confidence_success_total=int(
|
||||
confidence_row.high_confidence_success_total or 0
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def render_adr100_slo_metrics(snapshot: Adr100SloMetricsSnapshot) -> str:
|
||||
"""Render ADR-100 SLO metrics in Prometheus text exposition format."""
|
||||
lines: list[str] = [
|
||||
"",
|
||||
"# HELP automation_operation_log_total DB-derived AI automation operation count for ADR-100 SLOs",
|
||||
"# TYPE automation_operation_log_total counter",
|
||||
]
|
||||
if snapshot.automation_operations:
|
||||
for sample in snapshot.automation_operations:
|
||||
lines.append(
|
||||
"automation_operation_log_total"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}",'
|
||||
f'operation_type="{_escape_label(sample.operation_type)}"}} '
|
||||
f"{sample.count}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
'automation_operation_log_total{outcome="none",operation_type="none"} 0'
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"# HELP automation_operation_created_24h DB-derived AI automation operation count created in the last 24 hours for ADR-100 SLO dashboards",
|
||||
"# TYPE automation_operation_created_24h gauge",
|
||||
])
|
||||
if snapshot.automation_operations_24h:
|
||||
for sample in snapshot.automation_operations_24h:
|
||||
lines.append(
|
||||
"automation_operation_created_24h"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}",'
|
||||
f'operation_type="{_escape_label(sample.operation_type)}"}} '
|
||||
f"{sample.count}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
'automation_operation_created_24h{outcome="none",operation_type="none"} 0'
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"# HELP post_execution_verification_total DB-derived post execution verification result count for ADR-100 SLOs",
|
||||
"# TYPE post_execution_verification_total counter",
|
||||
])
|
||||
if snapshot.post_execution_verifications:
|
||||
for sample in snapshot.post_execution_verifications:
|
||||
lines.append(
|
||||
"post_execution_verification_total"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}"}} {sample.count}'
|
||||
)
|
||||
else:
|
||||
lines.append('post_execution_verification_total{outcome="none"} 0')
|
||||
|
||||
lines.extend([
|
||||
"# HELP post_execution_verification_created_24h DB-derived post execution verification result count created in the last 24 hours for ADR-100 SLO dashboards",
|
||||
"# TYPE post_execution_verification_created_24h gauge",
|
||||
])
|
||||
if snapshot.post_execution_verifications_24h:
|
||||
for sample in snapshot.post_execution_verifications_24h:
|
||||
lines.append(
|
||||
"post_execution_verification_created_24h"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}"}} {sample.count}'
|
||||
)
|
||||
else:
|
||||
lines.append('post_execution_verification_created_24h{outcome="none"} 0')
|
||||
|
||||
lines.extend([
|
||||
"# HELP knowledge_entries_total DB-derived knowledge entry count for ADR-100 SLOs",
|
||||
"# TYPE knowledge_entries_total counter",
|
||||
f"knowledge_entries_total {snapshot.knowledge_entries_total}",
|
||||
"# HELP knowledge_entries_created_24h DB-derived knowledge entries created in the last 24 hours for ADR-100 SLOs",
|
||||
"# TYPE knowledge_entries_created_24h gauge",
|
||||
f"knowledge_entries_created_24h {snapshot.knowledge_entries_created_24h}",
|
||||
"# HELP approval_records_high_confidence_total DB-derived high confidence approval decisions for ADR-100 SLOs",
|
||||
"# TYPE approval_records_high_confidence_total counter",
|
||||
f"approval_records_high_confidence_total {snapshot.high_confidence_total}",
|
||||
"# HELP approval_records_high_confidence_success_total DB-derived high confidence approval decisions with successful verification for ADR-100 SLOs",
|
||||
"# TYPE approval_records_high_confidence_success_total counter",
|
||||
(
|
||||
"approval_records_high_confidence_success_total "
|
||||
f"{snapshot.high_confidence_success_total}"
|
||||
),
|
||||
"# HELP adr100_slo_emitter_last_success_timestamp Last successful ADR-100 DB metrics emission timestamp",
|
||||
"# TYPE adr100_slo_emitter_last_success_timestamp gauge",
|
||||
f"adr100_slo_emitter_last_success_timestamp {snapshot.emitted_at:.0f}",
|
||||
"",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _escape_label(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||
|
||||
|
||||
_AUTOMATION_OPERATION_SQL = """
|
||||
WITH automation_scope AS (
|
||||
SELECT
|
||||
CASE
|
||||
WHEN status <> 'success' THEN status
|
||||
WHEN actor = 'approval_execution'
|
||||
AND COALESCE(input->>'requested_by', '') NOT ILIKE 'auto%%'
|
||||
THEN 'human_required'
|
||||
ELSE 'auto_executed'
|
||||
END AS outcome,
|
||||
operation_type
|
||||
FROM automation_operation_log
|
||||
WHERE operation_type IN (
|
||||
'playbook_executed',
|
||||
'remediation_executed',
|
||||
'remediation_verified',
|
||||
'remediation_rolled_back',
|
||||
'self_correction_attempted'
|
||||
)
|
||||
UNION ALL
|
||||
SELECT
|
||||
CASE WHEN success THEN 'auto_executed' ELSE 'failed' END AS outcome,
|
||||
'auto_repair_executed' AS operation_type
|
||||
FROM auto_repair_executions
|
||||
)
|
||||
SELECT
|
||||
outcome,
|
||||
operation_type,
|
||||
count(*) AS count
|
||||
FROM automation_scope
|
||||
GROUP BY outcome, operation_type
|
||||
ORDER BY outcome, operation_type
|
||||
"""
|
||||
|
||||
|
||||
_AUTOMATION_OPERATION_24H_SQL = """
|
||||
WITH automation_scope AS (
|
||||
SELECT
|
||||
CASE
|
||||
WHEN status <> 'success' THEN status
|
||||
WHEN actor = 'approval_execution'
|
||||
AND COALESCE(input->>'requested_by', '') NOT ILIKE 'auto%%'
|
||||
THEN 'human_required'
|
||||
ELSE 'auto_executed'
|
||||
END AS outcome,
|
||||
operation_type
|
||||
FROM automation_operation_log
|
||||
WHERE operation_type IN (
|
||||
'playbook_executed',
|
||||
'remediation_executed',
|
||||
'remediation_verified',
|
||||
'remediation_rolled_back',
|
||||
'self_correction_attempted'
|
||||
)
|
||||
AND created_at >= NOW() - INTERVAL '24 hours'
|
||||
UNION ALL
|
||||
SELECT
|
||||
CASE WHEN success THEN 'auto_executed' ELSE 'failed' END AS outcome,
|
||||
'auto_repair_executed' AS operation_type
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
)
|
||||
SELECT
|
||||
outcome,
|
||||
operation_type,
|
||||
count(*) AS count
|
||||
FROM automation_scope
|
||||
GROUP BY outcome, operation_type
|
||||
ORDER BY outcome, operation_type
|
||||
"""
|
||||
|
||||
|
||||
_POST_EXECUTION_VERIFICATION_SQL = """
|
||||
SELECT verification_result AS outcome, count(*) AS count
|
||||
FROM incident_evidence
|
||||
WHERE verification_result IS NOT NULL
|
||||
GROUP BY verification_result
|
||||
ORDER BY verification_result
|
||||
"""
|
||||
|
||||
|
||||
_POST_EXECUTION_VERIFICATION_24H_SQL = """
|
||||
SELECT verification_result AS outcome, count(*) AS count
|
||||
FROM incident_evidence
|
||||
WHERE verification_result IS NOT NULL
|
||||
AND collected_at >= NOW() - INTERVAL '24 hours'
|
||||
GROUP BY verification_result
|
||||
ORDER BY verification_result
|
||||
"""
|
||||
|
||||
|
||||
_HIGH_CONFIDENCE_APPROVAL_SQL = """
|
||||
WITH approval_confidence AS (
|
||||
SELECT
|
||||
id,
|
||||
incident_id,
|
||||
COALESCE(
|
||||
CASE
|
||||
WHEN extra_metadata->>'confidence_score' ~ '^[0-9]+(\\.[0-9]+)?$'
|
||||
THEN (extra_metadata->>'confidence_score')::numeric
|
||||
ELSE NULL
|
||||
END,
|
||||
CASE
|
||||
WHEN extra_metadata->>'confidence' ~ '^[0-9]+(\\.[0-9]+)?$'
|
||||
THEN (extra_metadata->>'confidence')::numeric
|
||||
ELSE NULL
|
||||
END,
|
||||
composite_score,
|
||||
0
|
||||
) AS confidence
|
||||
FROM approval_records
|
||||
)
|
||||
SELECT
|
||||
count(*) FILTER (WHERE confidence >= 0.8) AS high_confidence_total,
|
||||
count(*) FILTER (
|
||||
WHERE confidence >= 0.8
|
||||
AND EXISTS (
|
||||
SELECT 1
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = approval_confidence.incident_id
|
||||
AND ev.verification_result = 'success'
|
||||
)
|
||||
) AS high_confidence_success_total
|
||||
FROM approval_confidence
|
||||
"""
|
||||
|
||||
|
||||
_adr100_slo_metrics_service: Adr100SloMetricsService | None = None
|
||||
|
||||
|
||||
def get_adr100_slo_metrics_service() -> Adr100SloMetricsService:
|
||||
global _adr100_slo_metrics_service
|
||||
if _adr100_slo_metrics_service is None:
|
||||
_adr100_slo_metrics_service = Adr100SloMetricsService()
|
||||
return _adr100_slo_metrics_service
|
||||
743
apps/api/src/services/adr100_slo_status_service.py
Normal file
743
apps/api/src/services/adr100_slo_status_service.py
Normal file
@@ -0,0 +1,743 @@
|
||||
"""
|
||||
Read-only ADR-100 SLO status snapshot.
|
||||
|
||||
GovernanceAgent.check_slo_compliance() can emit governance alerts when an SLO is
|
||||
violated. This service is intentionally read-only so dashboards can show the
|
||||
same Prometheus-backed state without producing Telegram/DB side effects.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.core.config import settings
|
||||
from src.db.base import get_db_context
|
||||
from src.utils.timezone import now_taipei_iso
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Adr100SloDefinition:
|
||||
name: str
|
||||
query: str
|
||||
target: float
|
||||
hard_red_line: float
|
||||
direction: str
|
||||
unit: str
|
||||
window: str
|
||||
denominator_query: str | None = None
|
||||
denominator_window_seconds: int = 0
|
||||
minimum_events: float = 1.0
|
||||
|
||||
|
||||
ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = (
|
||||
Adr100SloDefinition(
|
||||
name="autonomy_rate",
|
||||
query="sli:autonomy_rate:5m",
|
||||
target=0.80,
|
||||
hard_red_line=0.70,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="5m",
|
||||
denominator_query="sum(rate(automation_operation_log_total[5m]))",
|
||||
denominator_window_seconds=300,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="decision_accuracy",
|
||||
query="sli:decision_accuracy:5m",
|
||||
target=0.90,
|
||||
hard_red_line=0.85,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="5m",
|
||||
denominator_query='sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))',
|
||||
denominator_window_seconds=300,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="confidence_calibration",
|
||||
query="sli:confidence_calibration:1h",
|
||||
target=0.80,
|
||||
hard_red_line=0.70,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="1h",
|
||||
denominator_query="sum(rate(approval_records_high_confidence_total[1h]))",
|
||||
denominator_window_seconds=3600,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="km_growth_rate",
|
||||
query="max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)",
|
||||
target=20.0,
|
||||
hard_red_line=5.0,
|
||||
direction="above",
|
||||
unit="count",
|
||||
window="24h",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Adr100SloStatusService:
|
||||
"""Fetch ADR-100 SLO status from Prometheus without writing governance events."""
|
||||
|
||||
async def fetch_report(self) -> dict[str, Any]:
|
||||
prom_url = getattr(
|
||||
settings,
|
||||
"PROMETHEUS_URL",
|
||||
"http://prometheus.observability.svc:9090",
|
||||
).rstrip("/")
|
||||
metrics: list[dict[str, Any]] = []
|
||||
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
for definition in ADR100_SLO_DEFINITIONS:
|
||||
metrics.append(await self._fetch_metric(client, prom_url, definition))
|
||||
|
||||
evaluable = [metric for metric in metrics if metric.get("evaluable")]
|
||||
ok_count = sum(1 for metric in evaluable if metric.get("status") == "ok")
|
||||
overall_compliance = (ok_count / len(evaluable)) if evaluable else None
|
||||
verification_coverage = await self._fetch_verification_coverage()
|
||||
overall_status = _overall_status(metrics, evaluable, verification_coverage)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_slo_status_v1",
|
||||
"source": "prometheus+postgresql",
|
||||
"evaluated_at": now_taipei_iso(),
|
||||
"overall_status": overall_status,
|
||||
"overall_compliance": overall_compliance,
|
||||
"evaluable_count": len(evaluable),
|
||||
"metric_count": len(metrics),
|
||||
"metrics": metrics,
|
||||
"verification_coverage": verification_coverage,
|
||||
}
|
||||
|
||||
async def _fetch_metric(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
prom_url: str,
|
||||
definition: Adr100SloDefinition,
|
||||
) -> dict[str, Any]:
|
||||
denominator_value: float | None = None
|
||||
sample_count: float | None = None
|
||||
|
||||
if definition.denominator_query:
|
||||
denominator_result = await _query_prometheus_value(
|
||||
client,
|
||||
prom_url,
|
||||
definition.denominator_query,
|
||||
)
|
||||
if denominator_result["status"] != "ok":
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status="no_data",
|
||||
reason=denominator_result["reason"],
|
||||
denominator_value=None,
|
||||
sample_count=None,
|
||||
)
|
||||
|
||||
denominator_value = float(denominator_result["value"])
|
||||
sample_count = denominator_value * definition.denominator_window_seconds
|
||||
if sample_count < definition.minimum_events:
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status="skipped_low_volume",
|
||||
reason="denominator_below_minimum_events",
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count,
|
||||
)
|
||||
|
||||
value_result = await _query_prometheus_value(client, prom_url, definition.query)
|
||||
if value_result["status"] != "ok":
|
||||
status = (
|
||||
"skipped_low_volume"
|
||||
if value_result["reason"] == "prometheus_nan_or_inf"
|
||||
else "no_data"
|
||||
)
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status=status,
|
||||
reason=value_result["reason"],
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count,
|
||||
)
|
||||
|
||||
value = float(value_result["value"])
|
||||
status = _classify_status(value, definition)
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=value,
|
||||
status=status,
|
||||
reason=None,
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count if sample_count is not None else value,
|
||||
)
|
||||
|
||||
async def _fetch_verification_coverage(self) -> dict[str, Any]:
|
||||
"""Summarize whether recent auto-repair executions have verifier evidence."""
|
||||
try:
|
||||
async with get_db_context() as db:
|
||||
summary_row = (
|
||||
await db.execute(text(_VERIFICATION_COVERAGE_SQL))
|
||||
).mappings().one()
|
||||
recent_rows = (
|
||||
await db.execute(text(_VERIFICATION_COVERAGE_RECENT_SQL))
|
||||
).mappings().all()
|
||||
recent_non_success_rows = (
|
||||
await db.execute(text(_VERIFICATION_COVERAGE_NON_SUCCESS_SQL))
|
||||
).mappings().all()
|
||||
except Exception as exc:
|
||||
logger.warning("adr100_verification_coverage_query_error", error=str(exc))
|
||||
return {
|
||||
"schema_version": "adr100_verification_coverage_v1",
|
||||
"source": "postgresql",
|
||||
"window": "24h",
|
||||
"status": "error",
|
||||
"reason": "postgresql_query_error",
|
||||
"evaluable": False,
|
||||
"total_auto": 0,
|
||||
"successful_auto": 0,
|
||||
"verified_auto": 0,
|
||||
"verified_success": 0,
|
||||
"verified_non_success": 0,
|
||||
"unverified_auto": 0,
|
||||
"coverage_rate": None,
|
||||
"verification_success_rate": None,
|
||||
"last_auto_at": None,
|
||||
"last_verified_auto_at": None,
|
||||
"last_verification_evidence_at": None,
|
||||
"latest_auto_age_seconds": None,
|
||||
"last_verified_auto_age_seconds": None,
|
||||
"recent_unverified": [],
|
||||
"recent_non_success": [],
|
||||
"non_success_breakdown": {
|
||||
"by_verification_result": [],
|
||||
"by_failure_class": [],
|
||||
},
|
||||
"remediation_queue": _remediation_queue_payload([]),
|
||||
}
|
||||
|
||||
return _build_verification_coverage_payload(
|
||||
summary_row,
|
||||
recent_rows,
|
||||
recent_non_success_rows,
|
||||
)
|
||||
|
||||
|
||||
_VERIFICATION_COVERAGE_SQL = """
|
||||
WITH recent_auto AS (
|
||||
SELECT id, incident_id, success, created_at
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
),
|
||||
per_auto AS (
|
||||
SELECT
|
||||
are.id,
|
||||
are.incident_id,
|
||||
are.success,
|
||||
are.created_at,
|
||||
latest.verification_result,
|
||||
latest.collected_at AS verification_collected_at,
|
||||
latest.self_healing_score
|
||||
FROM recent_auto are
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT ev.verification_result, ev.collected_at, ev.self_healing_score
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = are.incident_id
|
||||
AND ev.verification_result IS NOT NULL
|
||||
ORDER BY ev.collected_at DESC
|
||||
LIMIT 1
|
||||
) latest ON TRUE
|
||||
)
|
||||
SELECT
|
||||
count(*)::int AS total_auto,
|
||||
count(*) FILTER (WHERE success)::int AS successful_auto,
|
||||
count(*) FILTER (WHERE verification_result IS NOT NULL)::int AS verified_auto,
|
||||
count(*) FILTER (WHERE verification_result = 'success')::int AS verified_success,
|
||||
count(*) FILTER (WHERE verification_result IN ('degraded','failed','timeout'))::int AS verified_non_success,
|
||||
count(*) FILTER (WHERE verification_result IS NULL)::int AS unverified_auto,
|
||||
max(created_at) AS last_auto_at,
|
||||
max(created_at) FILTER (WHERE verification_result IS NOT NULL) AS last_verified_auto_at,
|
||||
max(verification_collected_at) AS last_verification_evidence_at,
|
||||
EXTRACT(EPOCH FROM (NOW() - max(created_at)))::int AS latest_auto_age_seconds,
|
||||
EXTRACT(EPOCH FROM (NOW() - (max(created_at) FILTER (WHERE verification_result IS NOT NULL))))::int
|
||||
AS last_verified_auto_age_seconds
|
||||
FROM per_auto
|
||||
"""
|
||||
|
||||
|
||||
_VERIFICATION_COVERAGE_RECENT_SQL = """
|
||||
WITH recent_auto AS (
|
||||
SELECT id, incident_id, success, created_at
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
),
|
||||
per_auto AS (
|
||||
SELECT
|
||||
are.id,
|
||||
are.incident_id,
|
||||
are.success,
|
||||
are.created_at,
|
||||
latest.verification_result
|
||||
FROM recent_auto are
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT ev.verification_result
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = are.incident_id
|
||||
AND ev.verification_result IS NOT NULL
|
||||
ORDER BY ev.collected_at DESC
|
||||
LIMIT 1
|
||||
) latest ON TRUE
|
||||
)
|
||||
SELECT id, incident_id, success, created_at
|
||||
FROM per_auto
|
||||
WHERE verification_result IS NULL
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 5
|
||||
"""
|
||||
|
||||
|
||||
_VERIFICATION_COVERAGE_NON_SUCCESS_SQL = """
|
||||
WITH recent_auto AS (
|
||||
SELECT
|
||||
id,
|
||||
incident_id,
|
||||
success,
|
||||
playbook_id,
|
||||
playbook_name,
|
||||
triggered_by,
|
||||
risk_level,
|
||||
error_message,
|
||||
created_at
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
),
|
||||
per_auto AS (
|
||||
SELECT
|
||||
are.id AS auto_repair_id,
|
||||
are.incident_id,
|
||||
are.success AS auto_success,
|
||||
are.playbook_id,
|
||||
are.playbook_name,
|
||||
are.triggered_by,
|
||||
are.risk_level,
|
||||
left(coalesce(are.error_message, ''), 240) AS auto_error,
|
||||
are.created_at AS auto_created_at,
|
||||
latest.verification_result,
|
||||
latest.collected_at AS verification_collected_at,
|
||||
left(coalesce(latest.post_execution_state::text, ''), 700) AS post_state_text,
|
||||
left(coalesce(latest.evidence_summary, ''), 300) AS evidence_summary
|
||||
FROM recent_auto are
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
ev.verification_result,
|
||||
ev.collected_at,
|
||||
ev.post_execution_state,
|
||||
ev.evidence_summary
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = are.incident_id
|
||||
AND ev.verification_result IS NOT NULL
|
||||
ORDER BY ev.collected_at DESC
|
||||
LIMIT 1
|
||||
) latest ON TRUE
|
||||
)
|
||||
SELECT
|
||||
p.*,
|
||||
i.status::text AS incident_status,
|
||||
i.severity::text AS incident_severity,
|
||||
i.alert_category,
|
||||
i.alertname
|
||||
FROM per_auto p
|
||||
LEFT JOIN incidents i ON i.incident_id = p.incident_id
|
||||
WHERE p.verification_result IS NOT NULL
|
||||
AND p.verification_result <> 'success'
|
||||
ORDER BY p.auto_created_at DESC
|
||||
LIMIT 8
|
||||
"""
|
||||
|
||||
|
||||
async def _query_prometheus_value(
|
||||
client: httpx.AsyncClient,
|
||||
prom_url: str,
|
||||
query: str,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
response = await client.get(
|
||||
f"{prom_url}/api/v1/query",
|
||||
params={"query": query},
|
||||
)
|
||||
data = response.json()
|
||||
if data.get("status") != "success":
|
||||
return {"status": "error", "reason": "prometheus_query_failed"}
|
||||
|
||||
results = data.get("data", {}).get("result", [])
|
||||
if not results:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"reason": "prometheus_empty_result_metric_not_emitted",
|
||||
}
|
||||
|
||||
raw_value = results[0]["value"][1]
|
||||
value = float(raw_value)
|
||||
if not math.isfinite(value):
|
||||
return {
|
||||
"status": "skipped",
|
||||
"reason": "prometheus_nan_or_inf",
|
||||
"raw_value": raw_value,
|
||||
}
|
||||
return {"status": "ok", "value": value}
|
||||
except Exception as exc:
|
||||
logger.warning("adr100_slo_prometheus_query_error", query=query, error=str(exc))
|
||||
return {"status": "error", "reason": "prometheus_query_error"}
|
||||
|
||||
|
||||
def _metric_payload(
|
||||
definition: Adr100SloDefinition,
|
||||
*,
|
||||
value: float | None,
|
||||
status: str,
|
||||
reason: str | None,
|
||||
denominator_value: float | None,
|
||||
sample_count: float | None,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"name": definition.name,
|
||||
"query": definition.query,
|
||||
"value": value,
|
||||
"target": definition.target,
|
||||
"hard_red_line": definition.hard_red_line,
|
||||
"direction": definition.direction,
|
||||
"unit": definition.unit,
|
||||
"window": definition.window,
|
||||
"status": status,
|
||||
"evaluable": status in {"ok", "warning", "violated"},
|
||||
"reason": reason,
|
||||
"denominator_query": definition.denominator_query,
|
||||
"denominator_value": denominator_value,
|
||||
"sample_count": sample_count,
|
||||
}
|
||||
|
||||
|
||||
def _classify_status(value: float, definition: Adr100SloDefinition) -> str:
|
||||
if definition.direction == "above":
|
||||
if value < definition.hard_red_line:
|
||||
return "violated"
|
||||
if value < definition.target:
|
||||
return "warning"
|
||||
return "ok"
|
||||
|
||||
if value > definition.hard_red_line:
|
||||
return "violated"
|
||||
if value > definition.target:
|
||||
return "warning"
|
||||
return "ok"
|
||||
|
||||
|
||||
def _build_verification_coverage_payload(
|
||||
summary_row: Any,
|
||||
recent_unverified_rows: Any,
|
||||
recent_non_success_rows: Any = (),
|
||||
) -> dict[str, Any]:
|
||||
row = dict(summary_row)
|
||||
total_auto = int(row.get("total_auto") or 0)
|
||||
verified_auto = int(row.get("verified_auto") or 0)
|
||||
verified_success = int(row.get("verified_success") or 0)
|
||||
verified_non_success = int(row.get("verified_non_success") or 0)
|
||||
unverified_auto = int(row.get("unverified_auto") or 0)
|
||||
|
||||
if total_auto == 0:
|
||||
status = "skipped_low_volume"
|
||||
reason = "no_auto_repair_executions_24h"
|
||||
evaluable = False
|
||||
elif unverified_auto > 0:
|
||||
status = "warning"
|
||||
reason = "verification_backlog_present"
|
||||
evaluable = True
|
||||
elif verified_non_success > 0:
|
||||
status = "warning"
|
||||
reason = "non_success_verification_present"
|
||||
evaluable = True
|
||||
else:
|
||||
status = "ok"
|
||||
reason = None
|
||||
evaluable = True
|
||||
|
||||
coverage_rate = (verified_auto / total_auto) if total_auto else None
|
||||
verification_success_rate = (verified_success / verified_auto) if verified_auto else None
|
||||
recent_non_success = [
|
||||
_non_success_finding_payload(dict(raw))
|
||||
for raw in recent_non_success_rows
|
||||
]
|
||||
remediation_queue = _remediation_queue_payload(recent_non_success)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_verification_coverage_v1",
|
||||
"source": "postgresql",
|
||||
"window": "24h",
|
||||
"status": status,
|
||||
"reason": reason,
|
||||
"evaluable": evaluable,
|
||||
"total_auto": total_auto,
|
||||
"successful_auto": int(row.get("successful_auto") or 0),
|
||||
"verified_auto": verified_auto,
|
||||
"verified_success": verified_success,
|
||||
"verified_non_success": verified_non_success,
|
||||
"unverified_auto": unverified_auto,
|
||||
"coverage_rate": coverage_rate,
|
||||
"verification_success_rate": verification_success_rate,
|
||||
"last_auto_at": _iso(row.get("last_auto_at")),
|
||||
"last_verified_auto_at": _iso(row.get("last_verified_auto_at")),
|
||||
"last_verification_evidence_at": _iso(row.get("last_verification_evidence_at")),
|
||||
"latest_auto_age_seconds": _int_or_none(row.get("latest_auto_age_seconds")),
|
||||
"last_verified_auto_age_seconds": _int_or_none(row.get("last_verified_auto_age_seconds")),
|
||||
"recent_unverified": [
|
||||
{
|
||||
"id": str(item.get("id")),
|
||||
"incident_id": str(item.get("incident_id")),
|
||||
"success": bool(item.get("success")),
|
||||
"created_at": _iso(item.get("created_at")),
|
||||
}
|
||||
for item in (dict(raw) for raw in recent_unverified_rows)
|
||||
],
|
||||
"recent_non_success": recent_non_success,
|
||||
"non_success_breakdown": {
|
||||
"by_verification_result": _count_breakdown(
|
||||
item["verification_result"] for item in recent_non_success
|
||||
),
|
||||
"by_failure_class": _count_breakdown(
|
||||
item["failure_class"] for item in recent_non_success
|
||||
),
|
||||
"by_remediation_status": _count_breakdown(
|
||||
item["remediation_status"] for item in remediation_queue["items"]
|
||||
),
|
||||
},
|
||||
"remediation_queue": remediation_queue,
|
||||
}
|
||||
|
||||
|
||||
def _non_success_finding_payload(row: dict[str, Any]) -> dict[str, Any]:
|
||||
failure_class = _classify_non_success_failure(row)
|
||||
remediation = _remediation_for_failure_class(failure_class)
|
||||
return {
|
||||
"auto_repair_id": str(row.get("auto_repair_id")),
|
||||
"incident_id": str(row.get("incident_id")),
|
||||
"incident_status": str(row.get("incident_status") or "unknown"),
|
||||
"incident_severity": str(row.get("incident_severity") or "unknown"),
|
||||
"alert_category": row.get("alert_category"),
|
||||
"alertname": row.get("alertname"),
|
||||
"auto_success": bool(row.get("auto_success")),
|
||||
"playbook_id": row.get("playbook_id"),
|
||||
"playbook_name": row.get("playbook_name"),
|
||||
"triggered_by": row.get("triggered_by"),
|
||||
"risk_level": row.get("risk_level"),
|
||||
"verification_result": str(row.get("verification_result") or "unknown"),
|
||||
"failure_class": failure_class,
|
||||
"next_step": _next_step_for_failure_class(failure_class),
|
||||
"remediation_status": remediation["status"],
|
||||
"remediation_action": remediation["action"],
|
||||
"remediation_owner": remediation["owner"],
|
||||
"remediation_reason": remediation["reason"],
|
||||
"auto_error_excerpt": _short_text(row.get("auto_error"), 180),
|
||||
"evidence_excerpt": _short_text(row.get("evidence_summary"), 180),
|
||||
"auto_created_at": _iso(row.get("auto_created_at")),
|
||||
"verification_collected_at": _iso(row.get("verification_collected_at")),
|
||||
}
|
||||
|
||||
|
||||
def _classify_non_success_failure(row: dict[str, Any]) -> str:
|
||||
combined = " ".join(
|
||||
str(row.get(key) or "")
|
||||
for key in ("auto_error", "post_state_text", "evidence_summary")
|
||||
).lower()
|
||||
if "unsupported scheme" in combined:
|
||||
return "unsupported_action_scheme"
|
||||
if "missing_query_parameter" in combined:
|
||||
return "verifier_missing_promql"
|
||||
if "empty_pod_name" in combined:
|
||||
return "verifier_target_missing_pod"
|
||||
if not bool(row.get("auto_success")):
|
||||
return "auto_repair_execution_failed"
|
||||
|
||||
result = str(row.get("verification_result") or "").lower()
|
||||
if result in {"failed", "timeout"}:
|
||||
return f"verification_{result}"
|
||||
return "verification_degraded"
|
||||
|
||||
|
||||
def _remediation_for_failure_class(failure_class: str) -> dict[str, str]:
|
||||
"""Map a non-success verification class to a read-only remediation work item.
|
||||
|
||||
This is dashboard triage metadata only. It does not auto-close incidents,
|
||||
replay repairs, or approve write actions.
|
||||
"""
|
||||
if failure_class == "unsupported_action_scheme":
|
||||
return {
|
||||
"status": "ready_for_replay",
|
||||
"action": "replay_with_supported_executor",
|
||||
"owner": "auto_repair_executor",
|
||||
"reason": "executor_gateway_available_after_t23",
|
||||
}
|
||||
if failure_class == "verifier_missing_promql":
|
||||
return {
|
||||
"status": "ready_for_reverify",
|
||||
"action": "reverify_with_promql_template",
|
||||
"owner": "post_execution_verifier",
|
||||
"reason": "promql_template_available_after_t23",
|
||||
}
|
||||
if failure_class == "verifier_target_missing_pod":
|
||||
return {
|
||||
"status": "needs_target_mapping",
|
||||
"action": "map_target_and_reverify",
|
||||
"owner": "post_execution_verifier",
|
||||
"reason": "verifier_target_missing",
|
||||
}
|
||||
if failure_class == "auto_repair_execution_failed":
|
||||
return {
|
||||
"status": "needs_playbook_ticket",
|
||||
"action": "create_playbook_ticket",
|
||||
"owner": "solver_or_operator",
|
||||
"reason": "execution_failed_after_route_normalization",
|
||||
}
|
||||
if failure_class in {"verification_failed", "verification_timeout"}:
|
||||
return {
|
||||
"status": "manual_review",
|
||||
"action": "escalate_verification_failure",
|
||||
"owner": "sre_operator",
|
||||
"reason": "verifier_returned_hard_failure",
|
||||
}
|
||||
return {
|
||||
"status": "manual_review",
|
||||
"action": "inspect_degraded_evidence",
|
||||
"owner": "sre_operator",
|
||||
"reason": "degraded_evidence_requires_human_context",
|
||||
}
|
||||
|
||||
|
||||
def _next_step_for_failure_class(failure_class: str) -> str:
|
||||
if failure_class == "unsupported_action_scheme":
|
||||
return "normalize_playbook_executor"
|
||||
if failure_class == "verifier_missing_promql":
|
||||
return "add_verifier_query_template"
|
||||
if failure_class == "verifier_target_missing_pod":
|
||||
return "map_verifier_target"
|
||||
if failure_class == "auto_repair_execution_failed":
|
||||
return "review_auto_repair_execution"
|
||||
if failure_class in {"verification_failed", "verification_timeout"}:
|
||||
return "escalate_verification_failure"
|
||||
return "review_degraded_verification"
|
||||
|
||||
|
||||
def _remediation_queue_payload(recent_non_success: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
items: list[dict[str, Any]] = []
|
||||
for item in recent_non_success:
|
||||
items.append({
|
||||
"work_item_id": (
|
||||
f"verification:{item.get('incident_id')}:{item.get('auto_repair_id')}"
|
||||
),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"alertname": item.get("alertname"),
|
||||
"playbook_id": item.get("playbook_id"),
|
||||
"failure_class": item.get("failure_class"),
|
||||
"verification_result": item.get("verification_result"),
|
||||
"remediation_status": item.get("remediation_status"),
|
||||
"remediation_action": item.get("remediation_action"),
|
||||
"remediation_owner": item.get("remediation_owner"),
|
||||
"remediation_reason": item.get("remediation_reason"),
|
||||
"source": "adr100_verification_coverage",
|
||||
"auto_created_at": item.get("auto_created_at"),
|
||||
"verification_collected_at": item.get("verification_collected_at"),
|
||||
})
|
||||
|
||||
ready_for_ai = sum(
|
||||
1 for item in items
|
||||
if item.get("remediation_status") in {"ready_for_replay", "ready_for_reverify"}
|
||||
)
|
||||
needs_human = sum(
|
||||
1 for item in items
|
||||
if item.get("remediation_status") in {
|
||||
"needs_target_mapping",
|
||||
"needs_playbook_ticket",
|
||||
"manual_review",
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_remediation_queue_v1",
|
||||
"source": "recent_non_success_read_model",
|
||||
"total": len(items),
|
||||
"ready_for_ai": ready_for_ai,
|
||||
"needs_human": needs_human,
|
||||
"items": items,
|
||||
"by_status": _count_breakdown(
|
||||
item.get("remediation_status") for item in items
|
||||
),
|
||||
"by_action": _count_breakdown(
|
||||
item.get("remediation_action") for item in items
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _count_breakdown(values: Any) -> list[dict[str, Any]]:
|
||||
counts: dict[str, int] = {}
|
||||
for value in values:
|
||||
key = str(value or "unknown")
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
return [
|
||||
{"name": name, "count": count}
|
||||
for name, count in sorted(counts.items(), key=lambda item: (-item[1], item[0]))
|
||||
]
|
||||
|
||||
|
||||
def _short_text(value: Any, limit: int) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = " ".join(str(value).split())
|
||||
if not text:
|
||||
return None
|
||||
return text[:limit]
|
||||
|
||||
|
||||
def _iso(value: Any) -> str | None:
|
||||
return value.isoformat() if hasattr(value, "isoformat") else None
|
||||
|
||||
|
||||
def _int_or_none(value: Any) -> int | None:
|
||||
return int(value) if value is not None else None
|
||||
|
||||
|
||||
def _overall_status(
|
||||
metrics: list[dict[str, Any]],
|
||||
evaluable: list[dict[str, Any]],
|
||||
verification_coverage: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
if any(metric.get("status") == "violated" for metric in metrics):
|
||||
return "violated"
|
||||
if verification_coverage and verification_coverage.get("status") in {"violated", "warning"}:
|
||||
return str(verification_coverage["status"])
|
||||
if any(metric.get("status") == "warning" for metric in metrics):
|
||||
return "warning"
|
||||
if evaluable and any(metric.get("status") == "skipped_low_volume" for metric in metrics):
|
||||
return "partial"
|
||||
if evaluable:
|
||||
return "ok"
|
||||
if any(metric.get("status") == "no_data" for metric in metrics):
|
||||
return "no_data"
|
||||
return "skipped_low_volume"
|
||||
|
||||
|
||||
_adr100_slo_status_service: Adr100SloStatusService | None = None
|
||||
|
||||
|
||||
def get_adr100_slo_status_service() -> Adr100SloStatusService:
|
||||
global _adr100_slo_status_service
|
||||
if _adr100_slo_status_service is None:
|
||||
_adr100_slo_status_service = Adr100SloStatusService()
|
||||
return _adr100_slo_status_service
|
||||
@@ -27,7 +27,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
@@ -63,11 +63,25 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
def _agent_debate_global_timeout_seconds() -> float:
|
||||
"""Return the full Phase 2 debate timeout.
|
||||
|
||||
GCP Ollama incident analysis can legitimately take longer than the old
|
||||
90s guard. Keep a hard ceiling, but make it an explicit deployment knob.
|
||||
"""
|
||||
|
||||
raw = os.environ.get("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "420.0")
|
||||
try:
|
||||
timeout = float(raw)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 420.0
|
||||
return max(timeout, 90.0)
|
||||
|
||||
|
||||
# 全局超時(所有 Agent 加起來)
|
||||
# 2026-04-16 Claude Sonnet 4.6: deepseek-r1:14b 實測 2.2-27.3s avg 10.6s
|
||||
# 原 30s 對 3 個序列 Agent 每個只剩 10s → 頻繁 timeout → confidence=20%
|
||||
# 調整: 每 Agent 25s, 3個序列+1組並行 = 最差 75s + buffer = 90s
|
||||
GLOBAL_TIMEOUT_SEC = 90.0
|
||||
# 2026-05-06 Codex: configurable for GCP-A/GCP-B/111 Ollama-first incident
|
||||
# diagnosis. The old 90s guard was cutting off valid deep diagnosis runs.
|
||||
GLOBAL_TIMEOUT_SEC = _agent_debate_global_timeout_seconds()
|
||||
|
||||
# 2026-04-16 ogt + Claude Sonnet 4.6: 移除 _PER_AGENT_TIMEOUT_SEC
|
||||
# LLM 必須等到完整回應,不得人工截斷。降級只在真正異常(連線失敗、模型崩潰)觸發。
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""
|
||||
Ollama Provider - Phase 24 ADR-052
|
||||
====================================
|
||||
本地 LLM 推理 (192.168.0.188 VMware VM, CPU-only)
|
||||
本地 / 私有 LLM 推理 Provider。
|
||||
|
||||
搬移自: openclaw.py _call_ollama (L349-409)
|
||||
特性: 免費、隱私安全 (local)、但 CPU 慢 (~97s/30tokens for qwen2.5:7b)
|
||||
特性: 免費、隱私安全 (local)、可依 ADR-110 指向 GCP-A/GCP-B/111。
|
||||
|
||||
2026-04-02 ogt: Phase 24-A 從 openclaw.py 抽出
|
||||
"""
|
||||
@@ -29,6 +29,62 @@ from src.services.model_registry import get_model_registry
|
||||
logger = structlog.get_logger(__name__)
|
||||
settings = get_settings()
|
||||
|
||||
_GCP_LIGHTWEIGHT_MODELS = {
|
||||
"gemma3:4b",
|
||||
}
|
||||
|
||||
|
||||
def _normalized_url(value: str | None) -> str:
|
||||
return (value or "").rstrip("/")
|
||||
|
||||
|
||||
def _is_gcp_alert_lane(endpoint_url: str) -> bool:
|
||||
"""Return true for the CPU-only GCP-A/B synchronous alert lane."""
|
||||
endpoint = _normalized_url(endpoint_url)
|
||||
return endpoint in {
|
||||
_normalized_url(getattr(settings, "OLLAMA_URL", "")),
|
||||
_normalized_url(getattr(settings, "OLLAMA_SECONDARY_URL", "")),
|
||||
}
|
||||
|
||||
|
||||
def _resolve_model_for_endpoint(
|
||||
*,
|
||||
requested_model: str,
|
||||
endpoint_url: str,
|
||||
context: dict | None,
|
||||
) -> str:
|
||||
"""
|
||||
Keep non-diagnosis calls from polluting the GCP diagnosis lane.
|
||||
|
||||
GCP-A/B are allowed to run the deep incident diagnosis model because the
|
||||
alert goal is correctness and resolution, not the fastest Telegram card.
|
||||
Accidental non-diagnosis workloads still fall back to the lightweight health
|
||||
model so embedding/Hermes/background calls cannot occupy the same lane.
|
||||
"""
|
||||
model_name = requested_model.strip()
|
||||
context = context or {}
|
||||
allow_gcp_heavy = bool(context.get("allow_gcp_heavy_model"))
|
||||
task_type = str(context.get("task_type") or context.get("intent_hint") or "").lower()
|
||||
is_deep_diagnosis = task_type in {"diagnose", "alert_deep", "incident_diagnosis"}
|
||||
|
||||
if (
|
||||
_is_gcp_alert_lane(endpoint_url)
|
||||
and not allow_gcp_heavy
|
||||
and not is_deep_diagnosis
|
||||
and model_name not in _GCP_LIGHTWEIGHT_MODELS
|
||||
):
|
||||
fallback_model = str(getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "gemma3:4b")).strip() or "gemma3:4b"
|
||||
logger.warning(
|
||||
"ollama_gcp_non_diagnosis_model_coerced",
|
||||
endpoint=endpoint_url,
|
||||
requested_model=model_name,
|
||||
safe_model=fallback_model,
|
||||
task_type=task_type,
|
||||
)
|
||||
return fallback_model
|
||||
|
||||
return model_name
|
||||
|
||||
|
||||
class OllamaProvider:
|
||||
"""
|
||||
@@ -77,11 +133,17 @@ class OllamaProvider:
|
||||
client = await self._get_client()
|
||||
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
endpoint_url = self._endpoint_url()
|
||||
requested_model = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
|
||||
model_name = _resolve_model_for_endpoint(
|
||||
requested_model=requested_model,
|
||||
endpoint_url=endpoint_url,
|
||||
context=context,
|
||||
)
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
# P0 2026-04-04 Claude Code: per-task timeout(Option C 分情境)
|
||||
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s,實測 ~173s)
|
||||
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS
|
||||
# 其他 → OPENCLAW_TIMEOUT(既有設定)
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
@@ -89,7 +151,6 @@ class OllamaProvider:
|
||||
else:
|
||||
read_timeout = float(settings.OPENCLAW_TIMEOUT)
|
||||
|
||||
endpoint_url = self._endpoint_url()
|
||||
response = await client.post(
|
||||
f"{endpoint_url}/api/generate",
|
||||
json={
|
||||
@@ -112,7 +173,13 @@ class OllamaProvider:
|
||||
tokens = data.get("eval_count", 0) + data.get("prompt_eval_count", 0)
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
|
||||
logger.info("ollama_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1))
|
||||
logger.info(
|
||||
"ollama_provider_success",
|
||||
response_length=len(result),
|
||||
tokens=tokens,
|
||||
latency_ms=round(latency, 1),
|
||||
model=model_name,
|
||||
)
|
||||
return AIResult(
|
||||
raw_response=result,
|
||||
success=True,
|
||||
@@ -158,7 +225,7 @@ class OllamaProvider:
|
||||
total_tokens = 0
|
||||
messages: list[dict] = [{"role": "user", "content": prompt}]
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
|
||||
options = registry.get_provider_options("ollama")
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
@@ -268,33 +335,27 @@ class OllamaProvider:
|
||||
self._http_client = None
|
||||
|
||||
|
||||
# 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 — OLLAMA_188 provider 註冊
|
||||
class Ollama188Provider(OllamaProvider):
|
||||
# 2026-05-06 Codex — 188 不再作為 Ollama Provider;本地備援統一命名為 ollama_local。
|
||||
class OllamaLocalProvider(OllamaProvider):
|
||||
"""
|
||||
Ollama 188 CPU-only 備援 Provider
|
||||
Ollama Local fallback Provider
|
||||
|
||||
繼承 OllamaProvider,但使用 OLLAMA_FALLBACK_URL(192.168.0.188:11434)
|
||||
作為推理端點,模型預設 OLLAMA_HEALTH_CHECK_MODEL(qwen2.5:7b-instruct)。
|
||||
|
||||
B1 修復:原本 _init_registry 未登錄此 provider,導致
|
||||
executor.execute() 遇到 "ollama_188" → not_registered → 跳過,
|
||||
188 從未被打到。此類別補全登錄鏈路。
|
||||
|
||||
2026-04-26 Wave5 B1-fix by Claude Engineer-A4
|
||||
使用 OLLAMA_FALLBACK_URL 作為本地最後防線端點。
|
||||
ADR-110 目前設定為 110 nginx proxy → 111 Ollama;188 不得再作為 Ollama provider。
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "ollama_188"
|
||||
return "ollama_local"
|
||||
|
||||
@property
|
||||
def is_enabled(self) -> bool:
|
||||
import os
|
||||
# 優先查 ENABLE_OLLAMA_188;若未設定(預設 true)則看 OLLAMA_FALLBACK_URL 是否有值
|
||||
env_override = os.getenv("ENABLE_OLLAMA_188", "true").lower() == "true"
|
||||
# 優先查 ENABLE_OLLAMA_LOCAL;若未設定(預設 true)則看 OLLAMA_FALLBACK_URL 是否有值。
|
||||
env_override = os.getenv("ENABLE_OLLAMA_LOCAL", "true").lower() == "true"
|
||||
if not env_override:
|
||||
return False
|
||||
# OLLAMA_FALLBACK_URL 空字串 → 未設定 188 節點 → 停用
|
||||
# OLLAMA_FALLBACK_URL 空字串 → 未設定本地節點 → 停用。
|
||||
return bool(getattr(settings, "OLLAMA_FALLBACK_URL", ""))
|
||||
|
||||
def _endpoint_url(self) -> str:
|
||||
@@ -319,18 +380,18 @@ class Ollama188Provider(OllamaProvider):
|
||||
client = await self._get_client()
|
||||
|
||||
registry = get_model_registry()
|
||||
# 嘗試取 ollama_188 專屬設定,fallback 到 ollama 預設
|
||||
# 嘗試取本地 fallback 專屬設定,fallback 到 ollama 預設。
|
||||
try:
|
||||
model_name = registry.get_model("ollama_188", "rca")
|
||||
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_local", "rca")).strip()
|
||||
except Exception:
|
||||
model_name = getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")
|
||||
model_name = str((context or {}).get("ollama_model") or getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")).strip()
|
||||
|
||||
try:
|
||||
options = registry.get_provider_options("ollama_188")
|
||||
options = registry.get_provider_options("ollama_local")
|
||||
except Exception:
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
# CPU-only 備援:固定使用較長 timeout(CPU 推理慢)
|
||||
# 本地備援:固定使用較長 timeout,避免 111 模型載入時被過早判死。
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200))
|
||||
@@ -359,11 +420,12 @@ class Ollama188Provider(OllamaProvider):
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
|
||||
logger.info(
|
||||
"ollama_188_provider_success",
|
||||
"ollama_local_provider_success",
|
||||
response_length=len(result),
|
||||
tokens=tokens,
|
||||
latency_ms=round(latency, 1),
|
||||
endpoint=fallback_url,
|
||||
model=model_name,
|
||||
)
|
||||
return AIResult(
|
||||
raw_response=result,
|
||||
@@ -375,12 +437,12 @@ class Ollama188Provider(OllamaProvider):
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
logger.warning("ollama_188_provider_timeout", error=str(e), latency_ms=round(latency, 1))
|
||||
logger.warning("ollama_local_provider_timeout", error=str(e), latency_ms=round(latency, 1))
|
||||
return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=f"Timeout: {e}")
|
||||
|
||||
except Exception as e:
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
logger.warning("ollama_188_provider_failed", error=str(e), latency_ms=round(latency, 1))
|
||||
logger.warning("ollama_local_provider_failed", error=str(e), latency_ms=round(latency, 1))
|
||||
return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=str(e))
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user