Compare commits

...

521 Commits

Author SHA1 Message Date
Your Name
0028993851 docs(awooop): record t62 recurrence dry run [skip ci] 2026-05-18 21:51:48 +08:00
AWOOOI CD
5c934de83d chore(cd): deploy d1ebcda [skip ci] 2026-05-18 21:47:10 +08:00
Your Name
d1ebcdac10 feat(awooop): preview recurrence repair work items
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m11s
CD Pipeline / build-and-deploy (push) Successful in 3m33s
CD Pipeline / post-deploy-checks (push) Successful in 1m32s
2026-05-18 21:42:20 +08:00
Your Name
51660ecbb1 docs(awooop): record t61 recurrence work items [skip ci] 2026-05-18 20:41:18 +08:00
AWOOOI CD
bc99683432 chore(cd): deploy b506145 [skip ci] 2026-05-18 20:35:43 +08:00
Your Name
b50614528e feat(awooop): surface recurrence repair work items
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m20s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m32s
2026-05-18 20:30:43 +08:00
Your Name
bbf5105fb4 docs(awooop): record t60 recurrence repair evidence [skip ci] 2026-05-18 20:17:20 +08:00
AWOOOI CD
d321f44e49 chore(cd): deploy 4b8f946 [skip ci] 2026-05-18 20:11:19 +08:00
Your Name
4b8f946699 fix(awooop): preserve recurrence repair fields
All checks were successful
Code Review / ai-code-review (push) Successful in 21s
CD Pipeline / tests (push) Successful in 1m20s
CD Pipeline / build-and-deploy (push) Successful in 3m37s
CD Pipeline / post-deploy-checks (push) Successful in 1m38s
2026-05-18 20:06:20 +08:00
AWOOOI CD
e36c9b1800 chore(cd): deploy 7fa0673 [skip ci] 2026-05-18 19:55:42 +08:00
Your Name
7fa06731da feat(awooop): link recurring alerts to repair work
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m21s
CD Pipeline / build-and-deploy (push) Successful in 4m2s
CD Pipeline / post-deploy-checks (push) Successful in 1m45s
2026-05-18 19:50:12 +08:00
Your Name
4ec116c012 docs(awooop): record t59 recurring alert links [skip ci] 2026-05-18 19:33:18 +08:00
AWOOOI CD
41ed3c0421 chore(cd): deploy 94f8c68 [skip ci] 2026-05-18 11:28:56 +00:00
Your Name
94f8c68b77 feat(awooop): show recurring alert links
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 3m55s
CD Pipeline / post-deploy-checks (push) Successful in 1m57s
2026-05-18 19:23:37 +08:00
Your Name
d709e25d69 docs(awooop): record t58 source dossier coverage [skip ci] 2026-05-18 19:13:05 +08:00
AWOOOI CD
ba1e7997ad chore(cd): deploy 213523c [skip ci] 2026-05-18 11:06:39 +00:00
Your Name
213523c77d feat(awooop): surface source dossier coverage
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m52s
CD Pipeline / post-deploy-checks (push) Successful in 1m34s
2026-05-18 19:01:28 +08:00
Your Name
fbde48438b docs(awooop): record t57 callback evidence search [skip ci] 2026-05-18 16:36:40 +08:00
AWOOOI CD
17d3c161e4 chore(cd): deploy 28c2b36 [skip ci] 2026-05-18 16:30:31 +08:00
Your Name
28c2b365b3 fix(awooop): type callback reply project filter
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m28s
CD Pipeline / post-deploy-checks (push) Successful in 1m37s
2026-05-18 16:25:45 +08:00
AWOOOI CD
31f778d60b chore(cd): deploy 08a75f4 [skip ci] 2026-05-18 16:22:07 +08:00
Your Name
08a75f4b5a feat(awooop): search callback reply evidence
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m44s
CD Pipeline / post-deploy-checks (push) Successful in 1m31s
2026-05-18 16:17:05 +08:00
Your Name
e4e1244c0f docs(awooop): record t56 callback filter rollout [skip ci] 2026-05-18 16:04:11 +08:00
AWOOOI CD
aff2a57db7 chore(cd): deploy f3494e0 [skip ci] 2026-05-18 07:59:24 +00:00
Your Name
f3494e0bfb feat(awooop): filter runs by callback reply state
All checks were successful
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 3m53s
CD Pipeline / post-deploy-checks (push) Successful in 1m40s
2026-05-18 15:54:21 +08:00
Your Name
e81e3f7b8a docs(awooop): record t55 callback list evidence [skip ci] 2026-05-18 15:43:21 +08:00
AWOOOI CD
32d4d1ea8b chore(cd): deploy 0e3c63e [skip ci] 2026-05-18 15:38:35 +08:00
Your Name
0e3c63ec15 fix(awooop): preserve callback summary in run list response
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m44s
CD Pipeline / post-deploy-checks (push) Successful in 1m24s
2026-05-18 15:33:40 +08:00
AWOOOI CD
be551ac761 chore(cd): deploy 20d62ee [skip ci] 2026-05-18 15:29:42 +08:00
Your Name
20d62ee0cf feat(awooop): surface callback replies on run list
All checks were successful
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / tests (push) Successful in 1m25s
CD Pipeline / build-and-deploy (push) Successful in 3m35s
CD Pipeline / post-deploy-checks (push) Successful in 1m50s
2026-05-18 15:24:39 +08:00
Your Name
584bd4b31b docs(awooop): record t54 callback timeline evidence [skip ci] 2026-05-18 15:03:55 +08:00
AWOOOI CD
f35527c7ed chore(cd): deploy 1a16e08 [skip ci] 2026-05-18 14:59:23 +08:00
Your Name
1a16e083e7 feat(awooop): show callback reply states in timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m24s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-18 14:54:49 +08:00
Your Name
ed37000eba docs(awooop): record t53 callback reply evidence [skip ci] 2026-05-18 14:48:54 +08:00
AWOOOI CD
82e33f6a17 chore(cd): deploy c972302 [skip ci] 2026-05-18 06:45:36 +00:00
Your Name
c97230252a feat(telegram): record callback reply evidence
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 3m21s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-18 14:40:47 +08:00
Your Name
e9e6cda06e docs(awooop): record t51 t52 evidence [skip ci] 2026-05-18 14:31:57 +08:00
AWOOOI CD
10965af845 chore(cd): deploy 8ca875e [skip ci] 2026-05-18 06:28:11 +00:00
Your Name
8ca875e6ad fix(web): keep navigation shell before hydration
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m53s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-18 14:22:55 +08:00
AWOOOI CD
ea96bb0971 chore(cd): deploy 1ee0740 [skip ci] 2026-05-18 14:17:08 +08:00
Your Name
1ee0740b13 fix(telegram): harden detail history html fallback
All checks were successful
Code Review / ai-code-review (push) Successful in 26s
CD Pipeline / tests (push) Successful in 1m15s
CD Pipeline / build-and-deploy (push) Successful in 3m43s
CD Pipeline / post-deploy-checks (push) Successful in 2m1s
2026-05-18 14:12:08 +08:00
Your Name
79038a6efb docs(awooop): record t50 mcp run evidence [skip ci] 2026-05-18 14:04:30 +08:00
AWOOOI CD
5d36638c79 chore(cd): deploy 9d02ab8 [skip ci] 2026-05-18 14:00:09 +08:00
Your Name
9d02ab8080 feat(awooop): surface mcp investigation evidence
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 3m30s
CD Pipeline / post-deploy-checks (push) Successful in 2m12s
2026-05-18 13:55:27 +08:00
Your Name
b9597d8d70 docs(awooop): record t49 host mcp evidence [skip ci] 2026-05-18 12:36:09 +08:00
AWOOOI CD
749b210997 chore(cd): deploy 5cb10a6 [skip ci] 2026-05-18 12:29:41 +08:00
Your Name
5cb10a6d2d fix(mcp): enrich host log evidence params
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 2m29s
CD Pipeline / build-and-deploy (push) Successful in 4m15s
CD Pipeline / post-deploy-checks (push) Successful in 1m41s
2026-05-18 12:23:39 +08:00
AWOOOI CD
0e7fe211de chore(cd): deploy 64c7044 [skip ci] 2026-05-18 04:19:18 +00:00
Your Name
64c7044282 fix(mcp): balance host alert tool suggestions
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m41s
CD Pipeline / post-deploy-checks (push) Successful in 1m43s
2026-05-18 12:14:21 +08:00
AWOOOI CD
989390f7ce chore(cd): deploy 98a10cb [skip ci] 2026-05-18 12:08:19 +08:00
Your Name
98a10cbc7b fix(awooop): initialize mcp runtime for signal worker
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Successful in 3m24s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s
2026-05-18 12:03:35 +08:00
AWOOOI CD
df7d957310 chore(cd): deploy a023c53 [skip ci] 2026-05-18 11:54:16 +08:00
Your Name
a023c535db fix(awooop): bridge signal worker observations
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m23s
CD Pipeline / post-deploy-checks (push) Successful in 1m22s
2026-05-18 11:49:33 +08:00
Your Name
161e337e77 docs(awooop): record t48 verified auto-repair gate 2026-05-18 11:24:00 +08:00
AWOOOI CD
c4c1e22587 chore(cd): deploy 3f7bf24 [skip ci] 2026-05-18 11:12:06 +08:00
Your Name
3f7bf24b23 fix(ci): make secret base64 helper runner-portable
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-18 11:06:37 +08:00
Your Name
1a2b04f5cf fix(awooop): persist signal metadata and auto-repair prestate
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Failing after 3m33s
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-18 10:59:54 +08:00
Your Name
5c240744eb docs(awooop): record t47 production verification 2026-05-18 10:34:59 +08:00
AWOOOI CD
9f64739544 chore(cd): deploy 5d10c8f [skip ci] 2026-05-18 10:32:01 +08:00
Your Name
5d10c8fbfe fix(awooop): parallelize quality summary truth-chain fetch
All checks were successful
Code Review / ai-code-review (push) Successful in 22s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m20s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
2026-05-18 10:27:32 +08:00
Your Name
168241e3c5 docs(awooop): record t46 production verification 2026-05-18 10:23:41 +08:00
AWOOOI CD
fd0888b092 chore(cd): deploy daf672a [skip ci] 2026-05-18 10:17:19 +08:00
Your Name
daf672aa1e feat(awooop): show automation claim on work items
Some checks failed
Code Review / ai-code-review (push) Failing after 1s
CD Pipeline / tests (push) Successful in 1m2s
CD Pipeline / build-and-deploy (push) Successful in 3m25s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
2026-05-18 10:12:51 +08:00
Your Name
fd5ea0cf94 docs(telegram): record t45 production verification 2026-05-18 09:57:35 +08:00
AWOOOI CD
8bacb65a75 chore(cd): deploy 0dd4b48 [skip ci] 2026-05-18 09:52:39 +08:00
Your Name
0dd4b486c5 fix(telegram): keep info callbacks nonfatal
All checks were successful
Code Review / ai-code-review (push) Successful in 19s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 3m41s
CD Pipeline / post-deploy-checks (push) Successful in 1m21s
2026-05-18 09:47:40 +08:00
Your Name
ae18751d17 docs(ci): record secret guard verification 2026-05-18 09:42:09 +08:00
Your Name
986d1a937d fix(ci): run secret surface guard with node
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
2026-05-18 09:41:09 +08:00
Your Name
9f2974f4c5 fix(ci): guard gitea workflow secret surfaces
Some checks failed
Code Review / ai-code-review (push) Failing after 10s
2026-05-18 09:39:13 +08:00
Your Name
e8b507be54 docs(awooop): record legacy mcp production verification 2026-05-18 09:26:59 +08:00
AWOOOI CD
13d6aa41d8 chore(cd): deploy 902593f [skip ci] 2026-05-18 01:22:24 +00:00
Your Name
902593f775 feat(awooop): surface legacy mcp evidence in run detail
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 4m7s
CD Pipeline / post-deploy-checks (push) Successful in 1m57s
2026-05-18 09:16:59 +08:00
Your Name
bc701b8fd3 docs(ops): record momo telegram log hygiene 2026-05-18 09:06:14 +08:00
Your Name
756fe92601 fix(ops): converge openclaw compose project
All checks were successful
Ansible Lint / lint (push) Successful in 35s
2026-05-18 08:53:55 +08:00
Your Name
41a7ec93d6 docs(ci): record ansible lint recovery 2026-05-18 04:44:46 +08:00
Your Name
dca1eb642f fix(ansible): clear lint baseline debt
All checks were successful
Ansible Lint / lint (push) Successful in 28s
2026-05-18 04:17:39 +08:00
Your Name
ec18dec0d3 chore(ci): trigger ansible lint with runner label fix
Some checks failed
Ansible Lint / lint (push) Failing after 38s
2026-05-18 03:57:06 +08:00
Your Name
8a7a332190 fix(ci): align ansible lint runner label
All checks were successful
Code Review / ai-code-review (push) Successful in 15s
2026-05-18 03:29:59 +08:00
Your Name
24f4324ae9 fix(ops): align openclaw systemd project and redact token
Some checks failed
Ansible Lint / lint (push) Has been cancelled
2026-05-18 02:48:33 +08:00
Your Name
6b60f6b086 docs(awooop): record t38 production verification 2026-05-18 00:43:26 +08:00
AWOOOI CD
a42e40a68c chore(cd): deploy f0bb303 [skip ci] 2026-05-18 00:37:50 +08:00
Your Name
f0bb303655 fix(awooop): surface auto repair verification state
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m38s
CD Pipeline / post-deploy-checks (push) Successful in 1m29s
2026-05-18 00:32:50 +08:00
Your Name
40ec5055e1 docs(awooop): record t37 telegram callback closure 2026-05-18 00:12:08 +08:00
AWOOOI CD
68b20be2b4 chore(cd): deploy 9e1b15d [skip ci] 2026-05-18 00:09:05 +08:00
Your Name
9e1b15dabf fix(telegram): sync rejected polling callbacks
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m40s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m36s
2026-05-18 00:03:52 +08:00
AWOOOI CD
06f64c6ddd chore(cd): deploy 913e1ab [skip ci] 2026-05-17 23:59:40 +08:00
Your Name
913e1abcfa fix(telegram): execute approved callbacks
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 3m39s
CD Pipeline / post-deploy-checks (push) Successful in 1m31s
2026-05-17 23:54:50 +08:00
Your Name
ba971e7a29 docs(awooop): record t36 incident header rollout 2026-05-17 23:46:36 +08:00
AWOOOI CD
bb4041579c chore(cd): deploy 69f2ec5 [skip ci] 2026-05-17 15:42:44 +00:00
Your Name
69f2ec5ec9 feat(awooop): add incident evidence headers
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m40s
CD Pipeline / post-deploy-checks (push) Successful in 1m28s
2026-05-17 23:37:53 +08:00
Your Name
a6699c41f8 docs(awooop): record t35 incident evidence rollout 2026-05-17 22:58:39 +08:00
AWOOOI CD
d4b2cf003f chore(cd): deploy 76c302a [skip ci] 2026-05-17 22:54:32 +08:00
Your Name
76c302ab5f feat(awooop): expose incident evidence links
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m1s
CD Pipeline / build-and-deploy (push) Successful in 3m26s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
2026-05-17 22:49:55 +08:00
Your Name
2d579cdf1e docs(awooop): record t34 incident deep link rollout 2026-05-17 22:39:42 +08:00
AWOOOI CD
6e9029273b chore(cd): deploy ef1e28b [skip ci] 2026-05-17 22:31:57 +08:00
Your Name
ef1e28b73a fix(telegram): keep url buttons out of callback assertions
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m45s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-17 22:26:51 +08:00
Your Name
6868a9a93d feat(awooop): link telegram alerts to incident runs
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Failing after 1m58s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-17 22:17:21 +08:00
Your Name
3aabceb234 docs(awooop): record t33 evidence filter rollout 2026-05-17 21:38:38 +08:00
AWOOOI CD
0d9cde51aa chore(cd): deploy a3f2b01 [skip ci] 2026-05-17 13:28:05 +00:00
Your Name
a3f2b010f8 fix(awooop): widen remediation filter context
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m16s
CD Pipeline / build-and-deploy (push) Successful in 3m45s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s
2026-05-17 21:22:56 +08:00
AWOOOI CD
e6a62bb13b chore(cd): deploy 665e72b [skip ci] 2026-05-17 13:19:13 +00:00
Your Name
665e72ba33 feat(awooop): filter runs by remediation evidence
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 4m9s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s
2026-05-17 21:13:54 +08:00
Your Name
171443ee94 docs(awooop): record t32 telegram evidence rollout 2026-05-17 21:07:10 +08:00
AWOOOI CD
5b8f324523 chore(cd): deploy cfaa4d0 [skip ci] 2026-05-17 21:04:11 +08:00
Your Name
cfaa4d0a4a feat(telegram): surface remediation evidence on alert cards
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m23s
CD Pipeline / post-deploy-checks (push) Successful in 1m23s
2026-05-17 20:59:32 +08:00
Your Name
f02923b24a docs(awooop): record t31 list evidence rollout 2026-05-17 20:48:24 +08:00
AWOOOI CD
06489ef844 chore(cd): deploy 64fc19b [skip ci] 2026-05-17 20:40:49 +08:00
Your Name
64fc19b4d5 fix(awooop): align run list evidence table columns
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m18s
CD Pipeline / build-and-deploy (push) Successful in 3m21s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-17 20:36:05 +08:00
AWOOOI CD
5f3f8fc253 chore(cd): deploy 0592402 [skip ci] 2026-05-17 20:31:24 +08:00
Your Name
0592402779 feat(awooop): surface remediation evidence in run lists
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 4m1s
CD Pipeline / post-deploy-checks (push) Successful in 1m54s
2026-05-17 20:26:03 +08:00
Your Name
27c2a3d980 docs(awooop): record t30 run timeline rollout
All checks were successful
E2E Health Check / e2e-health (push) Successful in 22s
2026-05-15 05:28:51 +08:00
AWOOOI CD
3ca3502147 chore(cd): deploy 5af7108 [skip ci] 2026-05-15 05:13:42 +08:00
Your Name
5af7108b18 fix(awooop): avoid run timeline hydration mismatch
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 3m20s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-15 05:09:08 +08:00
AWOOOI CD
befe503aa4 chore(cd): deploy 226f551 [skip ci] 2026-05-15 04:06:46 +08:00
Your Name
226f551e77 fix(awooop): sort mixed run timeline timestamps
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m2s
CD Pipeline / build-and-deploy (push) Successful in 3m25s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-15 03:30:48 +08:00
AWOOOI CD
1db4ef093c chore(cd): deploy bc89940 [skip ci] 2026-05-15 02:37:24 +08:00
Your Name
bc89940564 feat(awooop): link remediation evidence to run timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m12s
CD Pipeline / build-and-deploy (push) Successful in 4m14s
CD Pipeline / post-deploy-checks (push) Successful in 1m54s
2026-05-15 02:31:46 +08:00
Your Name
6ec424b15c docs(awooop): record t29 telegram history rollout
All checks were successful
E2E Health Check / e2e-health (push) Successful in 22s
2026-05-14 23:43:12 +08:00
AWOOOI CD
615fa23390 chore(cd): deploy 65001da [skip ci] 2026-05-14 23:38:26 +08:00
Your Name
65001da0d8 fix(telegram): preserve incident history html output
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m1s
CD Pipeline / build-and-deploy (push) Successful in 3m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m29s
2026-05-14 23:33:43 +08:00
Your Name
f4a8390dc0 docs(frontend): record t28 incident timeline rollout 2026-05-14 23:18:20 +08:00
AWOOOI CD
7257aa3a9f chore(cd): deploy 475f2e4 [skip ci] 2026-05-14 23:14:01 +08:00
Your Name
475f2e452d feat(frontend): expand incident timeline event details
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m37s
CD Pipeline / post-deploy-checks (push) Successful in 1m23s
2026-05-14 23:09:12 +08:00
Your Name
d9d119ede2 docs(governance): record t27 remediation history rollout 2026-05-14 23:05:26 +08:00
AWOOOI CD
8d098f564d chore(cd): deploy 392cfb9 [skip ci] 2026-05-14 15:01:41 +00:00
Your Name
392cfb9025 feat(governance): surface remediation dry run history
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m1s
CD Pipeline / build-and-deploy (push) Successful in 3m45s
CD Pipeline / post-deploy-checks (push) Successful in 1m39s
2026-05-14 22:56:51 +08:00
Your Name
53cd7f9d66 docs(governance): record t26 dry run history rollout 2026-05-14 22:47:42 +08:00
AWOOOI CD
9870ed5e30 chore(cd): deploy 6aaaf87 [skip ci] 2026-05-14 14:43:33 +00:00
Your Name
6aaaf87ade feat(governance): persist remediation dry run history
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m44s
CD Pipeline / post-deploy-checks (push) Successful in 1m24s
2026-05-14 22:38:42 +08:00
Your Name
36cb9d6aeb docs(governance): record t25 remediation dry run rollout 2026-05-14 22:32:22 +08:00
AWOOOI CD
3749cc2ab5 chore(cd): deploy 04fdaee [skip ci] 2026-05-14 22:25:30 +08:00
Your Name
04fdaee83a feat(governance): add remediation dry run entrypoint
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m43s
CD Pipeline / post-deploy-checks (push) Successful in 1m33s
2026-05-14 22:20:34 +08:00
Your Name
102f92dfc3 docs(governance): record t24 remediation queue rollout 2026-05-14 22:06:36 +08:00
AWOOOI CD
cf173c49d8 chore(cd): deploy 44f7471 [skip ci] 2026-05-14 22:01:20 +08:00
Your Name
44f7471b21 fix(awooop): keep work items telemetry from blocking
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 3m21s
CD Pipeline / post-deploy-checks (push) Successful in 1m28s
2026-05-14 21:56:54 +08:00
AWOOOI CD
224ae9e202 chore(cd): deploy aa63ae5 [skip ci] 2026-05-14 21:50:04 +08:00
Your Name
aa63ae5eca feat(governance): surface verification remediation queue
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m0s
CD Pipeline / build-and-deploy (push) Successful in 3m25s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-14 21:45:33 +08:00
Your Name
f97127f704 docs(governance): record t23 auto repair gateway rollout 2026-05-14 21:24:55 +08:00
AWOOOI CD
33e4c9231e chore(cd): deploy 813d088 [skip ci] 2026-05-14 21:17:50 +08:00
Your Name
813d088339 feat(auto-repair): route ssh diagnostics through mcp gateway
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m11s
CD Pipeline / build-and-deploy (push) Successful in 3m17s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-14 21:13:05 +08:00
Your Name
0567135647 docs(governance): record t22 verifier breakdown rollout 2026-05-14 20:59:54 +08:00
AWOOOI CD
2582ad9425 chore(cd): deploy bad48de [skip ci] 2026-05-14 20:54:13 +08:00
Your Name
bad48dee04 feat(governance): explain verifier failures
All checks were successful
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / tests (push) Successful in 1m21s
CD Pipeline / build-and-deploy (push) Successful in 3m23s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-14 20:49:20 +08:00
Your Name
dd269b195c docs(governance): record t21 verifier coverage rollout 2026-05-14 20:40:01 +08:00
AWOOOI CD
b1893395f0 chore(cd): deploy 485c58d [skip ci] 2026-05-14 20:33:59 +08:00
Your Name
485c58d085 feat(governance): surface verification coverage
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 3m49s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-14 20:28:53 +08:00
Your Name
bc1a11e373 docs(governance): record t20 slo state rollout 2026-05-14 20:07:05 +08:00
AWOOOI CD
e37cbe1910 chore(cd): deploy 809bc96 [skip ci] 2026-05-14 12:02:33 +00:00
Your Name
809bc9670b feat(governance): surface adr100 slo states
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m0s
CD Pipeline / build-and-deploy (push) Successful in 4m0s
CD Pipeline / post-deploy-checks (push) Successful in 1m55s
2026-05-14 19:57:32 +08:00
Your Name
6c16a7b162 docs(governance): record t19 km slo rollout 2026-05-14 19:48:37 +08:00
AWOOOI CD
7d3685ef58 chore(cd): deploy 21dcfbd [skip ci] 2026-05-14 19:43:39 +08:00
Your Name
21dcfbd991 fix(governance): collapse km slo fallback series
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 22s
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 5m17s
CD Pipeline / post-deploy-checks (push) Successful in 1m38s
2026-05-14 19:37:15 +08:00
Your Name
d2a4a17969 fix(governance): stabilize adr100 km growth slo
Some checks failed
Code Review / ai-code-review (push) Successful in 22s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 25s
CD Pipeline / tests (push) Successful in 1m11s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
2026-05-14 19:33:52 +08:00
Your Name
cdb8bf6802 docs(governance): record adr100 slo emitter rollout 2026-05-14 19:22:39 +08:00
AWOOOI CD
80a056539c chore(cd): deploy b92c9e2 [skip ci] 2026-05-14 19:18:22 +08:00
Your Name
b92c9e285f fix(governance): scope adr100 automation metrics
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m16s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m28s
2026-05-14 19:13:33 +08:00
AWOOOI CD
b677cb11de chore(cd): deploy 368386a [skip ci] 2026-05-14 19:09:38 +08:00
Your Name
368386abc0 fix(governance): skip non-finite slo values
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m18s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-14 19:05:16 +08:00
AWOOOI CD
d1b0ee7e96 chore(cd): deploy 13cf02b [skip ci] 2026-05-14 19:01:24 +08:00
Your Name
13cf02b740 feat(governance): emit adr100 slo metrics
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m0s
CD Pipeline / build-and-deploy (push) Successful in 3m21s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-14 18:57:03 +08:00
Your Name
1670ff1960 docs(awooop): record t17b governance rollout 2026-05-14 18:47:39 +08:00
AWOOOI CD
9b32d3a9e7 chore(cd): deploy 6220f52 [skip ci] 2026-05-14 10:44:25 +00:00
Your Name
6220f52266 fix(governance): cast dispatch status filter
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m25s
CD Pipeline / build-and-deploy (push) Successful in 3m46s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-14 18:39:11 +08:00
AWOOOI CD
5ef9240583 chore(cd): deploy 08d28dc [skip ci] 2026-05-14 18:35:32 +08:00
Your Name
08d28dc44b fix(governance): normalize event and dispatch queries
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m0s
CD Pipeline / build-and-deploy (push) Successful in 3m18s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-14 18:31:11 +08:00
Your Name
6571260dd2 docs(awooop): record t17 production rollout 2026-05-14 18:17:45 +08:00
AWOOOI CD
687f37d837 chore(cd): deploy e8c4512 [skip ci] 2026-05-14 18:14:01 +08:00
Your Name
e8c4512a40 feat(awooop): surface automation work chain
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m33s
CD Pipeline / build-and-deploy (push) Successful in 4m3s
CD Pipeline / post-deploy-checks (push) Successful in 1m51s
2026-05-14 18:08:13 +08:00
Your Name
aa8b72043b docs(awooop): record t16 automation boundary 2026-05-14 01:15:45 +08:00
Your Name
b5288d4b7d docs(logbook): record t16 auto repair live fire 2026-05-14 01:14:12 +08:00
AWOOOI CD
a9b846c82a chore(cd): deploy 5604dd0 [skip ci] 2026-05-14 01:05:29 +08:00
Your Name
5604dd0256 fix(auto-repair): mark approval execution status
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 3m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s
2026-05-14 01:00:49 +08:00
AWOOOI CD
5361ad8f7e chore(cd): deploy 6f6d032 [skip ci] 2026-05-14 00:53:13 +08:00
Your Name
6f6d032ca9 fix(mcp): grant rollout verifier read tool
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
run-migration / migrate (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Successful in 3m35s
CD Pipeline / post-deploy-checks (push) Successful in 1m37s
2026-05-14 00:48:23 +08:00
AWOOOI CD
a91c38675a chore(cd): deploy 5fb73a5 [skip ci] 2026-05-13 16:42:16 +00:00
Your Name
5fb73a5612 fix(verifier): recognize rollout success evidence
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 3m38s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s
2026-05-14 00:37:32 +08:00
AWOOOI CD
c42b2dfe06 chore(cd): deploy b1ecb55 [skip ci] 2026-05-14 00:26:17 +08:00
Your Name
b1ecb55bd6 fix(verification): align playbook and mcp evidence for canary alerts
All checks were successful
Code Review / ai-code-review (push) Successful in 18s
CD Pipeline / tests (push) Successful in 1m2s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m39s
2026-05-14 00:21:44 +08:00
AWOOOI CD
42d0d076d6 chore(cd): deploy d835b66 [skip ci] 2026-05-14 00:11:33 +08:00
Your Name
d835b666cf fix(alertmanager): keep auto repair moving on ai fallback
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 3m25s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s
2026-05-14 00:06:49 +08:00
AWOOOI CD
39581ab824 chore(cd): deploy a0a0731 [skip ci] 2026-05-13 15:48:16 +00:00
Your Name
a0a0731cd6 fix(auto-repair): preserve exact playbook candidates
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 5m46s
CD Pipeline / build-and-deploy (push) Successful in 4m6s
CD Pipeline / post-deploy-checks (push) Successful in 1m28s
2026-05-13 23:38:19 +08:00
AWOOOI CD
5161a9dfd6 chore(cd): deploy 7a8cbb3 [skip ci] 2026-05-13 23:25:53 +08:00
Your Name
7a8cbb3241 fix(auto-repair): prefer exact playbooks and fail failed steps
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m32s
2026-05-13 23:21:17 +08:00
AWOOOI CD
ae643552e9 chore(cd): deploy 8885c1b [skip ci] 2026-05-13 23:10:18 +08:00
Your Name
8885c1b49d fix(cd): rebuild API image when T16 seed script changes
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-13 23:05:00 +08:00
Your Name
4ee57b710d fix(ops): support API image path for T16 seed script
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-13 23:03:40 +08:00
AWOOOI CD
5a31702885 chore(cd): deploy dcaf16c [skip ci] 2026-05-13 23:01:16 +08:00
Your Name
dcaf16cecc fix(docker): preserve nested T16 ops script in build context
All checks were successful
CD Pipeline / tests (push) Successful in 1m12s
CD Pipeline / build-and-deploy (push) Successful in 3m24s
CD Pipeline / post-deploy-checks (push) Successful in 1m23s
2026-05-13 22:56:35 +08:00
AWOOOI CD
07ed014a83 chore(cd): deploy c5f4baf [skip ci] 2026-05-13 22:54:32 +08:00
Your Name
c5f4bafcaf fix(docker): include T16 seed script in API image
All checks were successful
CD Pipeline / tests (push) Successful in 1m18s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 1m35s
2026-05-13 22:49:52 +08:00
AWOOOI CD
1277865343 chore(cd): deploy 7df94e9 [skip ci] 2026-05-13 22:44:15 +08:00
Your Name
7df94e9bef fix(k8s): fit auto repair canary resource floor
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m29s
CD Pipeline / build-and-deploy (push) Successful in 3m22s
CD Pipeline / post-deploy-checks (push) Successful in 1m59s
2026-05-13 22:39:09 +08:00
AWOOOI CD
8bb601eecd chore(cd): deploy 1778a69 [skip ci] 2026-05-13 22:35:22 +08:00
Your Name
1778a692e0 feat(awooop): add auto repair canary live-fire target
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m11s
CD Pipeline / build-and-deploy (push) Failing after 6m52s
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-13 22:30:20 +08:00
Your Name
0337b62349 docs(awooop): record event dossier rollout [skip ci] 2026-05-13 22:16:50 +08:00
AWOOOI CD
39e6ce747d chore(cd): deploy e947e60 [skip ci] 2026-05-13 22:12:55 +08:00
Your Name
e947e60d11 fix(awooop): type dossier run filter
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 3m47s
CD Pipeline / post-deploy-checks (push) Successful in 1m43s
2026-05-13 22:08:00 +08:00
AWOOOI CD
a21fc0f35a chore(cd): deploy 77aace7 [skip ci] 2026-05-13 22:04:10 +08:00
Your Name
77aace7515 feat(awooop): show inbound event dossiers
All checks were successful
Code Review / ai-code-review (push) Successful in 17s
CD Pipeline / tests (push) Successful in 1m19s
CD Pipeline / build-and-deploy (push) Successful in 3m30s
CD Pipeline / post-deploy-checks (push) Successful in 1m34s
2026-05-13 21:59:16 +08:00
Your Name
eb73591286 docs(awooop): record inbound envelope and agent boundary 2026-05-13 21:49:14 +08:00
AWOOOI CD
011085ce3d chore(cd): deploy a524e46 [skip ci] 2026-05-13 21:43:35 +08:00
Your Name
a524e468e4 fix(awooop): mark inbound-only truth chains received
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m19s
CD Pipeline / build-and-deploy (push) Successful in 3m24s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
2026-05-13 21:38:47 +08:00
AWOOOI CD
365d93f07e chore(cd): deploy 7950851 [skip ci] 2026-05-13 21:34:15 +08:00
Your Name
795085170a feat(awooop): persist inbound source envelopes
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m23s
CD Pipeline / build-and-deploy (push) Successful in 3m37s
CD Pipeline / post-deploy-checks (push) Successful in 1m34s
2026-05-13 21:29:04 +08:00
AWOOOI CD
c888444287 chore(cd): deploy ea320a2 [skip ci] 2026-05-13 21:19:22 +08:00
Your Name
ea320a2087 db(awooop): add inbound truth-chain envelope columns
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m1s
CD Pipeline / build-and-deploy (push) Successful in 3m36s
CD Pipeline / post-deploy-checks (push) Successful in 1m32s
2026-05-13 21:14:43 +08:00
Your Name
ebf0f57272 docs(awooop): record alertmanager truth-chain mirror 2026-05-13 20:47:31 +08:00
AWOOOI CD
dc865cf53d chore(cd): deploy 8d7b938 [skip ci] 2026-05-13 20:41:39 +08:00
Your Name
8d7b938f78 fix(awooop): surface alert inbound by provider event
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-13 20:37:02 +08:00
AWOOOI CD
453e22f80d chore(cd): deploy c6e4752 [skip ci] 2026-05-13 20:33:27 +08:00
Your Name
c6e47526a7 fix(awooop): use db-safe timestamps for alert mirrors
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m15s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s
2026-05-13 20:28:49 +08:00
AWOOOI CD
9b7a91d828 chore(cd): deploy c2d01eb [skip ci] 2026-05-13 20:22:21 +08:00
Your Name
c2d01eb6f1 feat(awooop): mirror alertmanager events into truth chain
All checks were successful
Code Review / ai-code-review (push) Successful in 19s
CD Pipeline / tests (push) Successful in 2m10s
CD Pipeline / build-and-deploy (push) Successful in 3m22s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-13 20:16:42 +08:00
Your Name
21042ad0e7 docs(awooop): record 188 key rotation verification 2026-05-13 20:05:41 +08:00
AWOOOI CD
bcf2ed7841 chore(cd): deploy 6064e6d [skip ci] 2026-05-13 20:02:11 +08:00
Your Name
6064e6d03f fix(cd): disable unsafe 188 secret sync path
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-13 19:57:13 +08:00
Your Name
830dc0dcd0 fix(cd): keep 188 deploy key out of step env
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-13 19:41:12 +08:00
Your Name
88dbcd912e docs(awooop): record t14c telegram flow progress 2026-05-13 19:38:55 +08:00
AWOOOI CD
2f5d812608 chore(cd): deploy 74c4767 [skip ci] 2026-05-13 11:34:47 +00:00
Your Name
74c47672da feat(telegram): show automation flow progress
All checks were successful
Code Review / ai-code-review (push) Successful in 22s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 3m39s
CD Pipeline / post-deploy-checks (push) Successful in 1m31s
2026-05-13 19:29:51 +08:00
Your Name
872abea008 docs(awooop): record t14b auto approved evidence link 2026-05-13 19:24:01 +08:00
AWOOOI CD
edba52f401 chore(cd): deploy 596f2f6 [skip ci] 2026-05-13 19:19:24 +08:00
Your Name
596f2f6820 fix(awooop): link auto approved execution evidence
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m42s
CD Pipeline / post-deploy-checks (push) Successful in 1m21s
2026-05-13 19:14:17 +08:00
Your Name
c68cbd3139 docs(awooop): record t14a verification deployment 2026-05-13 19:05:57 +08:00
AWOOOI CD
9c9cf68063 chore(cd): deploy 3bad354 [skip ci] 2026-05-13 19:00:59 +08:00
Your Name
3bad354414 fix(cd): include ed25519 deploy host keyscan
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
2026-05-13 18:55:49 +08:00
Your Name
518a16e895 fix(awooop): persist auto repair verification fallback
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Failing after 3m16s
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-13 18:47:46 +08:00
Your Name
a28baa6197 docs(awooop): record t13 quality classification deployment 2026-05-13 17:34:46 +08:00
AWOOOI CD
2314badec5 chore(cd): deploy cecadb3 [skip ci] 2026-05-13 17:29:11 +08:00
Your Name
cecadb331b fix(awooop): exclude audit-only ops from repair quality
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m1s
CD Pipeline / build-and-deploy (push) Successful in 3m41s
CD Pipeline / post-deploy-checks (push) Successful in 1m32s
2026-05-13 17:24:29 +08:00
AWOOOI CD
55b28336e5 chore(cd): deploy 22beddc [skip ci] 2026-05-13 09:17:44 +00:00
Your Name
22beddc8a8 fix(awooop): classify no action audits correctly
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m11s
CD Pipeline / build-and-deploy (push) Successful in 3m39s
CD Pipeline / post-deploy-checks (push) Successful in 1m37s
2026-05-13 17:12:44 +08:00
Your Name
c1e2567b15 docs(awooop): record t12d quality overview deployment 2026-05-13 16:49:29 +08:00
AWOOOI CD
90156a7c1a chore(cd): deploy 356bfce [skip ci] 2026-05-13 16:38:53 +08:00
Your Name
356bfce2c8 fix(awooop): expose quality summary aggregate
All checks were successful
Code Review / ai-code-review (push) Successful in 16s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m32s
2026-05-13 16:34:20 +08:00
AWOOOI CD
94fc25dc39 chore(cd): deploy e420306 [skip ci] 2026-05-13 16:28:36 +08:00
Your Name
e4203060f3 feat(awooop): surface automation quality overview
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m15s
CD Pipeline / build-and-deploy (push) Successful in 3m25s
CD Pipeline / post-deploy-checks (push) Successful in 1m15s
2026-05-13 16:23:47 +08:00
Your Name
aafe7273e3 docs(awooop): record t12 quality summary deployment 2026-05-13 16:06:30 +08:00
AWOOOI CD
d339e3ebad chore(cd): deploy ae7c7cb [skip ci] 2026-05-13 16:01:50 +08:00
Your Name
ae7c7cbd23 feat(awooop): summarize automation quality
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Successful in 3m43s
CD Pipeline / post-deploy-checks (push) Successful in 1m29s
2026-05-13 15:56:42 +08:00
Your Name
c00e911b28 docs(awooop): record t12 automation quality deployment 2026-05-13 12:59:34 +08:00
AWOOOI CD
15ff939b1f chore(cd): deploy 0f08024 [skip ci] 2026-05-13 04:56:44 +00:00
Your Name
0f080240c6 feat(awooop): expose automation quality gate
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 3m33s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s
2026-05-13 12:51:52 +08:00
Your Name
d886526f23 docs(awooop): record t12 outbound truth deployment 2026-05-13 12:35:54 +08:00
AWOOOI CD
d33856f874 chore(cd): deploy 04c7bb1 [skip ci] 2026-05-13 12:33:11 +08:00
Your Name
04c7bb1c97 fix(awooop): store outbound sent timestamp as naive utc
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m51s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
2026-05-13 12:28:04 +08:00
AWOOOI CD
3a1cedc90d chore(cd): deploy d449ba4 [skip ci] 2026-05-13 04:25:23 +00:00
Your Name
d449ba4720 fix(awooop): write outbound sent timestamp as parameter
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m18s
CD Pipeline / build-and-deploy (push) Successful in 3m48s
CD Pipeline / post-deploy-checks (push) Successful in 1m24s
2026-05-13 12:20:20 +08:00
AWOOOI CD
e2785899a2 chore(cd): deploy e57474a [skip ci] 2026-05-13 12:17:10 +08:00
Your Name
e57474adfb fix(awooop): cast outbound sent status timestamp gate
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m22s
CD Pipeline / build-and-deploy (push) Successful in 3m30s
CD Pipeline / post-deploy-checks (push) Successful in 1m24s
2026-05-13 12:12:16 +08:00
AWOOOI CD
971afafc01 chore(cd): deploy 7fa9f74 [skip ci] 2026-05-13 12:09:18 +08:00
Your Name
7fa9f743dd fix(awooop): strengthen outbound truth references
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m12s
CD Pipeline / build-and-deploy (push) Successful in 3m33s
CD Pipeline / post-deploy-checks (push) Successful in 1m15s
2026-05-13 12:04:26 +08:00
Your Name
7d506b785d docs(awooop): record t11 gateway detail deployment 2026-05-13 11:57:54 +08:00
AWOOOI CD
8e14f1bf3e chore(cd): deploy c486087 [skip ci] 2026-05-13 03:54:32 +00:00
Your Name
c486087294 feat(awooop): surface gateway summary in details
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m47s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-13 11:49:37 +08:00
Your Name
51528b2cf9 docs(awooop): record t10 gateway truth chain deployment 2026-05-13 11:38:56 +08:00
AWOOOI CD
5daa005c1b chore(cd): deploy a99dccf [skip ci] 2026-05-13 03:35:06 +00:00
Your Name
a99dccfc73 feat(awooop): summarize gateway usage in truth chain
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m45s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
2026-05-13 11:30:08 +08:00
Your Name
90603ad9bb docs(awooop): record t9 approval gateway deployment 2026-05-13 11:27:19 +08:00
AWOOOI CD
77877dd501 chore(cd): deploy 34bfe56 [skip ci] 2026-05-13 11:23:43 +08:00
Your Name
34bfe56f53 fix(awooop): persist approved ssh gateway blocks
All checks were successful
Code Review / ai-code-review (push) Successful in 20s
CD Pipeline / tests (push) Successful in 3m58s
CD Pipeline / build-and-deploy (push) Successful in 3m47s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-13 11:15:54 +08:00
AWOOOI CD
ce83e8dc00 chore(cd): deploy a0a2a5b [skip ci] 2026-05-13 11:10:27 +08:00
Your Name
a0a2a5b1f0 feat(awooop): gate approved ssh execution
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m22s
CD Pipeline / build-and-deploy (push) Successful in 6m36s
CD Pipeline / post-deploy-checks (push) Successful in 1m42s
2026-05-13 11:02:24 +08:00
Your Name
85a1bcef52 docs(awooop): record t8 post verify gateway deployment 2026-05-13 10:46:25 +08:00
AWOOOI CD
f19fe4aa90 chore(cd): deploy 1a03bce [skip ci] 2026-05-13 10:41:33 +08:00
Your Name
1a03bceb5c feat(awooop): route post verify mcp through gateway
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 10m15s
CD Pipeline / post-deploy-checks (push) Successful in 1m54s
2026-05-13 10:30:03 +08:00
Your Name
15873b9e0c docs(awooop): record t7 mcp gateway deployment 2026-05-13 10:25:47 +08:00
AWOOOI CD
8ac4ba24f7 chore(cd): deploy 42789db [skip ci] 2026-05-13 10:22:15 +08:00
Your Name
42789dbe9e fix(awooop): enable awoooi mcp gateway shadow
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 9s
CD Pipeline / tests (push) Successful in 2m32s
CD Pipeline / build-and-deploy (push) Successful in 12m19s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s
2026-05-13 10:07:20 +08:00
AWOOOI CD
7ed9859260 chore(cd): deploy 0b70749 [skip ci] 2026-05-13 10:01:23 +08:00
Your Name
0b707495a1 fix(migrations): retrigger mcp gateway seed
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
run-migration / migrate (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m1s
CD Pipeline / build-and-deploy (push) Successful in 6m59s
CD Pipeline / post-deploy-checks (push) Successful in 1m27s
2026-05-13 09:53:15 +08:00
Your Name
e177eca25d fix(migrations): set tenant context for mcp seed
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
2026-05-13 09:51:13 +08:00
Your Name
146cf411ae fix(ci): retry migrations on permission denied
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Failing after 9s
CD Pipeline / tests (push) Successful in 1m21s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
2026-05-13 09:48:56 +08:00
Your Name
57ed07d1d0 feat(awooop): route sense mcp through gateway
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Failing after 8s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
2026-05-13 09:46:12 +08:00
Your Name
5ecd21e664 docs(awooop): record t6 incident visibility deployment 2026-05-13 09:33:17 +08:00
AWOOOI CD
c01012d767 chore(cd): deploy af9798a [skip ci] 2026-05-13 09:29:04 +08:00
Your Name
af9798a62e feat(awooop): surface reconciliation in incident timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 5m4s
CD Pipeline / post-deploy-checks (push) Successful in 1m15s
2026-05-13 09:22:51 +08:00
Your Name
5294f0712f docs(awooop): record t5 reconciliation deployment 2026-05-13 09:14:15 +08:00
AWOOOI CD
631fc22090 chore(cd): deploy 1003fa4 [skip ci] 2026-05-13 09:10:20 +08:00
Your Name
1003fa4246 feat(awooop): expose incident reconciliation state
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 59s
CD Pipeline / build-and-deploy (push) Successful in 7m3s
CD Pipeline / post-deploy-checks (push) Successful in 1m15s
2026-05-13 09:02:16 +08:00
Your Name
54814bc65e docs(awooop): record t4 drift fingerprint deployment 2026-05-13 07:52:42 +08:00
AWOOOI CD
3d38039b86 chore(cd): deploy 5b34877 [skip ci] 2026-05-13 07:40:58 +08:00
Your Name
5b34877429 feat(awooop): expose drift repeat fingerprint
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m20s
CD Pipeline / post-deploy-checks (push) Successful in 1m13s
2026-05-13 07:36:21 +08:00
Your Name
b0a8302dd7 docs(awooop): record t3 decision audit deployment 2026-05-13 04:17:04 +08:00
AWOOOI CD
90b9ddb7a5 chore(cd): deploy 3799e0d [skip ci] 2026-05-12 20:12:20 +00:00
Your Name
3799e0db0d feat(awooop): audit ansible decision candidates
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m1s
CD Pipeline / build-and-deploy (push) Successful in 3m33s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-13 04:07:23 +08:00
Your Name
f61747aeac docs(awooop): record t3 ansible deployment 2026-05-13 04:03:48 +08:00
AWOOOI CD
07000dae3a chore(cd): deploy ca80972 [skip ci] 2026-05-12 19:59:30 +00:00
Your Name
49ffb5bb19 fix(ci): repair migration audit json literal
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
2026-05-13 03:59:22 +08:00
Your Name
ca80972dc7 feat(awooop): expose ansible audit truth surface
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Failing after 9s
CD Pipeline / tests (push) Successful in 2m21s
CD Pipeline / build-and-deploy (push) Successful in 3m50s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-13 03:53:13 +08:00
Your Name
feda8a0b4b fix(ci): harden migration audit seed
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-13 03:40:41 +08:00
Your Name
124c3c545b docs(awooop): record t2 truth-chain deployment 2026-05-13 03:31:35 +08:00
AWOOOI CD
dba3e405f4 chore(cd): deploy b4d367e [skip ci] 2026-05-13 03:26:51 +08:00
Your Name
b4d367eeb4 feat(awooop): expose mcp bridge truth chain
All checks were successful
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m55s
CD Pipeline / post-deploy-checks (push) Successful in 1m45s
2026-05-13 03:21:31 +08:00
Your Name
b81cb28615 docs(awooop): record t2 mcp bridge smoke 2026-05-13 00:33:03 +08:00
AWOOOI CD
c18c6f6fe2 chore(cd): deploy 94d006e [skip ci] 2026-05-12 23:48:50 +08:00
Your Name
94d006eac8 feat(awooop): bridge legacy mcp audit into gateway timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m22s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
2026-05-12 23:44:19 +08:00
Your Name
96a8cf3ad5 docs(awooop): record t1 truth-chain smoke 2026-05-12 23:36:51 +08:00
Your Name
f318fd3a89 fix(ci): harden migration workflow audit
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-12 23:29:54 +08:00
AWOOOI CD
1a62c322bc chore(cd): deploy 24b15f4 [skip ci] 2026-05-12 15:26:34 +00:00
Your Name
24b15f4ad2 feat(awooop): harden outbound truth chain mirror
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Failing after 8s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-12 23:21:45 +08:00
Your Name
c652f37b69 docs(awooop): 記錄 truth-chain production smoke 2026-05-12 23:05:16 +08:00
AWOOOI CD
c523a22d89 chore(cd): deploy f7c8453 [skip ci] 2026-05-12 15:00:31 +00:00
Your Name
f7c84530d6 feat(awooop): 新增 truth-chain 查詢 API
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m16s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-12 22:55:36 +08:00
Your Name
56228dbb79 docs(awooop): 盤點 Telegram 自動化真相鏈缺口 2026-05-12 22:41:05 +08:00
Your Name
de16c88418 chore(rls): 套用 outbound message canary
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-12 21:55:23 +08:00
Your Name
edd06485e0 docs(rls): 記錄 projects canary 套用 2026-05-12 21:41:14 +08:00
AWOOOI CD
7f94bc5776 chore(cd): deploy 7d92f0a [skip ci] 2026-05-12 13:30:31 +00:00
Your Name
7d92f0acd7 chore(rls): stage projects canary path
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m49s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
2026-05-12 21:25:24 +08:00
Your Name
b7af597459 chore(rls): 套用 tool registry canary wave1.1
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-12 21:15:14 +08:00
Your Name
1617b73a9d docs(rls): 記錄 canary wave1 production apply 2026-05-12 20:55:40 +08:00
Your Name
8c4dc7a5a8 chore(rls): 新增 manual script gate 與 canary wave1
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Failing after 10m6s
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-12 20:23:27 +08:00
AWOOOI CD
be8ddf4599 chore(cd): deploy ff30c61 [skip ci] 2026-05-12 20:01:07 +08:00
Your Name
ff30c61c4c fix(rls): 收斂 API DB access context
All checks were successful
Code Review / ai-code-review (push) Successful in 21s
CD Pipeline / tests (push) Successful in 1m20s
CD Pipeline / build-and-deploy (push) Successful in 4m15s
CD Pipeline / post-deploy-checks (push) Successful in 1m58s
2026-05-12 19:55:13 +08:00
Your Name
33c0577e93 docs(ops): 記錄 RLS role bootstrap 套用 2026-05-12 19:35:28 +08:00
Your Name
f0255e0300 chore(ops): 補強 RLS role bootstrap gate
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-12 18:36:35 +08:00
Your Name
0bc1878778 chore(ops): 新增 RLS preflight 與 registry certbot 修復包
All checks were successful
Code Review / ai-code-review (push) Successful in 13s
2026-05-12 18:25:53 +08:00
Your Name
a18e2f9c3f fix(security): 停用 GitHub production deploy 2026-05-12 16:22:16 +08:00
Your Name
6b02f49fc6 docs(backup): 校正 MOMO 備份驗證紀錄 2026-05-12 15:53:20 +08:00
Your Name
216b7d78e2 fix(backup): 接入 MOMO PG 備份失敗通知
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
Ansible Lint / lint (push) Has been cancelled
2026-05-12 15:50:44 +08:00
Your Name
abdab85362 docs(awooop): record host backup notification deploy 2026-05-12 14:59:17 +08:00
Your Name
116fdbb33f docs(awooop): record ops notification deployment 2026-05-12 14:55:48 +08:00
AWOOOI CD
9db1e9b7a5 chore(cd): deploy 1a74286 [skip ci] 2026-05-12 14:48:50 +08:00
Your Name
1a74286dfa fix(awooop): mirror ops notifications through api
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-12 14:43:09 +08:00
AWOOOI CD
b437a33043 chore(cd): deploy 03ba967 [skip ci] 2026-05-12 14:31:32 +08:00
Your Name
03ba9678d5 fix(awooop): label cicd outbound timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 4m1s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s
2026-05-12 14:26:29 +08:00
Your Name
d74beb2176 fix(ci): prevent docker lock self match
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-12 14:21:57 +08:00
AWOOOI CD
f824308b6a chore(cd): deploy cb7151c [skip ci] 2026-05-12 06:12:20 +00:00
Your Name
cb7151cc27 fix(awooop): set shadow run defaults for mirrors
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 10m20s
CD Pipeline / post-deploy-checks (push) Successful in 2m33s
2026-05-12 14:01:03 +08:00
Your Name
ad8ead2546 fix(awooop): route ci notifications through event mirror
Some checks failed
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / tests (push) Successful in 1m18s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
2026-05-12 13:58:08 +08:00
AWOOOI CD
d356cd32fc chore(cd): deploy 80c36ba [skip ci] 2026-05-07 19:00:45 +08:00
Your Name
80c36ba801 fix(incident): F2 NO_ACTION 觸發 resolve_incident + 冪等 guard
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s
【根因】INC-20260507-99ADF2 飛輪斷流,566+ stuck incidents(30秒漲 1)核心
原因:NO_ACTION 路徑 (approval_execution.py:251) 提前 return True,跳過
line 482-495 已有的 resolve_incident 呼叫,incident 永遠卡 INVESTIGATING。

【修法】
- approval_execution.py NO_ACTION 分支補 resolve_incident 呼叫 + 成功/失敗
  log,背景 log 加 path="no_action" 用於 prod 量化修法生效率(debugger
  全鏈分析 + critic 1st/2nd 審查必修 #1)。
- incident_service.py resolve_incident 在 line 1106 加 RESOLVED 冪等 guard,
  早於所有副作用(status mutation / Redis / DB / postmortem / KB / KM /
  disposition),順帶修 success path line 482-495 重觸 postmortem 的潛在
  老風險(critic 必修 #2)。

【遵守 Codex 5/6 設計(feedback_respect_codex_design_intent.md)】
- 不動 flywheel_stats_service.py / heartbeat_report_service.py /
  auto_repair_service.record_auto_repair() / metrics_repository UPPER(status)。
- resolve_incident 不寫 auto_repair_executions 表(Codex 5/6 source of
  truth),不污染 24h KPI 計算。

【Test 覆蓋】
- test_approval_execution_no_action.py:NO_ACTION → resolve 被呼叫一次 +
  resolve raise 時仍 return True(NO_ACTION 不能因 resolve 失敗退化成 False,
  否則污染 auto_execute KPI line 207-208 註解契約)。
- test_incident_service_resolve_idempotency.py:RESOLVED → return existing +
  save_to_working_memory 不被呼叫;not_found → return None。

【驗收條件(部署後 24h)】
1. grep `path="no_action"` 中 incident_resolved_after_no_action_execution
   數量 vs background_execution_noop 數量,1:1 才算修復成功。
2. awoooi_flywheel_incidents_stuck 從每 30 秒漲 1 變平緩。
3. SRE 群 24h 內若湧入 >20 份 NO_ACTION postmortem 觸發 follow-up 評估
   resolution_type="no_action" 跳過 postmortem(critic Minor #3 方案 B)。

Refs: INC-20260507-99ADF2, debugger root cause #1 (鏈 A), critic 1st 必修
#1 #2, critic 2nd 必修 #1 #2 #3

Co-Authored-By: Codex (aider) <noreply@anthropic.com>
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-07 18:55:58 +08:00
AWOOOI CD
afb5f9556e chore(cd): deploy b3dc41f [skip ci] 2026-05-07 15:37:50 +08:00
Your Name
b3dc41fcd4 fix(metrics): 串入飛輪指標到 /metrics 主端點,修復 FlywheelExecutionRateMissing 死告警
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 3m28s
CD Pipeline / post-deploy-checks (push) Successful in 1m21s
INC-20260507-99ADF2 根因(feedback_full_chain_first_then_fix.md 全鏈分析):

【鏈路斷點】規則層(5/3 加)vs 指標層(5/6 改)vs scrape 層(從沒同步)
- 577250a6(5/3)「反消音化」commit 加了 FlywheelExecutionRateMissing
  rule,要求 110 Prom scrape 到 awoooi_flywheel_execution_success_rate;
- a2c4b3d4(5/6)Codex 改 FlywheelStatsService 用 auto_repair_executions
  作 source of truth(24h 樣本 1-9 筆回 None 給 W-3b watchdog 接管);
- 但 awoooi_flywheel_* 指標自始至終只在 /api/v1/stats/flywheel/metrics
  暴露,110 Prom awoooi-api job 抓的是 /metrics → absent() 永遠 1
  → 自 2026-05-06T04:14 UTC 起 firing 26h+ 屬 dead alert

【修法】只動 awoooi-api 一處,不碰 Codex 設計、不碰 110 Prom 配置:
- main.py /metrics endpoint 改 async,在 generate_latest() 後串入
  FlywheelStatsService.compute() → to_prometheus_lines()。
- 既有 awoooi-api scrape job 自動拿到飛輪指標。
- 完全保留 Codex a2c4b3d4 設計:1-9 筆回 None 讓 W-3b watchdog 雙保險。

【不碰的部分】
- flywheel_stats_service.py 不動:Codex 5/6 LOGBOOK 已明確說明
  「Redis playbook counter 失準 → 用 auto_repair_executions 為唯一信任源」,
  1-9 筆 return None 是配合 ai_slo_watchdog_job W-3b grace+30min 設計的
  反消音化雙保險,不是 bug。

驗證計畫(部署後):
1. curl /metrics | grep awoooi_flywheel  → 看到飛輪指標
2. Prom query awoooi_flywheel_execution_success_rate  → 非空
3. ALERTS{alertname="FlywheelExecutionRateMissing"}  → resolved
4. 30 分鐘觀察 Telegram 不再收 INC-20260507-99ADF2

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-07 15:33:04 +08:00
Your Name
c88d82f2ac docs(logbook): record timeline label deploy [skip ci] 2026-05-07 10:48:24 +08:00
AWOOOI CD
395cf742b9 chore(cd): deploy 72d86ba [skip ci] 2026-05-07 10:44:52 +08:00
Your Name
72d86ba70b fix(awooop): label outbound timeline events
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m23s
2026-05-07 10:40:14 +08:00
Your Name
a26ccf8d80 docs(logbook): record capacity migration rollout [skip ci] 2026-05-07 10:35:55 +08:00
AWOOOI CD
77ef400598 chore(cd): deploy 32e8a04 [skip ci] 2026-05-07 10:33:09 +08:00
Your Name
08097f4070 fix(ci): harden migration audit logging
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-07 10:32:41 +08:00
Your Name
32e8a045f4 fix(db): allow metric capacity violation types
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
run-migration / migrate (push) Failing after 9s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m28s
2026-05-07 10:28:33 +08:00
Your Name
814f5d8c6c docs(logbook): record channel shadow run deploy [skip ci] 2026-05-07 10:21:23 +08:00
AWOOOI CD
4f0d677e18 chore(cd): deploy 5d38115 [skip ci] 2026-05-07 02:17:32 +00:00
Your Name
5d38115d2f fix(awooop): anchor legacy channel events to shadow runs
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 4m9s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
2026-05-07 10:12:52 +08:00
Your Name
200b760512 docs(logbook): record approval timeline deploy [skip ci] 2026-05-07 10:09:42 +08:00
AWOOOI CD
83f4ab0dad chore(cd): deploy 2df36b1 [skip ci] 2026-05-07 10:06:30 +08:00
Your Name
2df36b11e2 fix(awooop): record approval decisions in run timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 58s
CD Pipeline / build-and-deploy (push) Successful in 3m28s
CD Pipeline / post-deploy-checks (push) Successful in 1m21s
2026-05-07 10:01:58 +08:00
Your Name
1b7f46f02c docs(logbook): record cd 188 sync deploy [skip ci] 2026-05-07 09:56:17 +08:00
AWOOOI CD
6ae3a55aed chore(cd): deploy 94e680a [skip ci] 2026-05-07 01:52:22 +00:00
Your Name
94e680add4 fix(cd): split ssh and scp options for 188 sync
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-07 09:46:17 +08:00
AWOOOI CD
4810125e9a chore(cd): deploy 3df2311 [skip ci] 2026-05-07 01:42:30 +00:00
Your Name
3df23112ef fix(awooop): reconnect approval decisions to run timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 59s
CD Pipeline / build-and-deploy (push) Successful in 3m45s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-07 09:37:45 +08:00
Your Name
2ccc9d3071 docs(logbook): record awooop action panel deploy [skip ci] 2026-05-07 09:32:40 +08:00
AWOOOI CD
624c1b26c3 chore(cd): deploy beba668 [skip ci] 2026-05-07 09:30:24 +08:00
Your Name
beba668a4c feat(awooop): add run detail action panel
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-07 09:25:49 +08:00
Your Name
c52ebfc042 docs(logbook): record awooop run detail i18n deploy [skip ci] 2026-05-07 06:06:33 +08:00
AWOOOI CD
8b9a974c66 chore(cd): deploy f960a4a [skip ci] 2026-05-07 05:51:18 +08:00
Your Name
f960a4a19b fix(awooop): localize run detail timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m2s
CD Pipeline / build-and-deploy (push) Successful in 3m36s
CD Pipeline / post-deploy-checks (push) Successful in 1m22s
2026-05-07 05:46:31 +08:00
Your Name
9d85ec5e96 docs(logbook): record awooop timeline deploy [skip ci] 2026-05-07 05:05:16 +08:00
AWOOOI CD
c00c7be9ae chore(cd): deploy 336fd76 [skip ci] 2026-05-06 20:25:22 +00:00
Your Name
336fd76774 fix(ssh): suppress asyncssh info log formatting noise
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m22s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-07 04:20:26 +08:00
AWOOOI CD
cd637ef616 chore(cd): deploy 66e22e2 [skip ci] 2026-05-06 20:00:17 +00:00
Your Name
66e22e26cb feat(awooop): add run detail timeline
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m18s
CD Pipeline / build-and-deploy (push) Successful in 3m58s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
2026-05-07 03:55:01 +08:00
Your Name
f10ab71c52 docs(logbook): record auto repair handoff card deploy [skip ci] 2026-05-07 02:15:48 +08:00
AWOOOI CD
d5555697a1 chore(cd): deploy 3f69e03 [skip ci] 2026-05-06 18:12:48 +00:00
Your Name
3f69e03fcb fix(telegram): clarify auto repair handoff cards
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m47s
CD Pipeline / post-deploy-checks (push) Successful in 1m57s
2026-05-07 02:07:43 +08:00
Your Name
57df3582dd docs(logbook): record grouped alert digest deploy [skip ci] 2026-05-07 02:00:34 +08:00
AWOOOI CD
14180182d3 chore(cd): deploy 6ac61ab [skip ci] 2026-05-06 17:56:12 +00:00
Your Name
6ac61ab6d7 fix(telegram): digest grouped alert storms
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m2s
CD Pipeline / build-and-deploy (push) Successful in 3m39s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-07 01:51:31 +08:00
Your Name
968de38a94 docs(logbook): record awooop grouped alert events deploy [skip ci] 2026-05-07 01:43:25 +08:00
AWOOOI CD
e5fd9395f7 chore(cd): deploy 251554c [skip ci] 2026-05-06 17:40:17 +00:00
Your Name
251554c044 fix(awooop): record grouped alert events
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 3m48s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
2026-05-07 01:35:09 +08:00
Your Name
1a1dea00eb docs(logbook): record alert grouping threshold deploy [skip ci] 2026-05-07 01:27:09 +08:00
AWOOOI CD
8485d99336 chore(cd): deploy c49246b [skip ci] 2026-05-07 01:24:50 +08:00
Your Name
c49246b8c6 fix(alerts): group repeated alerts from second firing
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
2026-05-07 01:20:18 +08:00
Your Name
67c70c071b docs(logbook): record telegram incident threading deploy [skip ci] 2026-05-07 01:18:46 +08:00
AWOOOI CD
18b34fed31 chore(cd): deploy 1f4a16e [skip ci] 2026-05-06 17:15:34 +00:00
Your Name
1f4a16e625 fix(telegram): thread incident follow-up messages
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m30s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-07 01:11:02 +08:00
Your Name
1a72f771de docs(logbook): record telegram card format deployment [skip ci] 2026-05-07 01:06:38 +08:00
AWOOOI CD
68e741e0c3 chore(cd): deploy 341c3b6 [skip ci] 2026-05-07 01:03:00 +08:00
Your Name
341c3b6523 fix(telegram): format governance and runbook alerts
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 3m22s
CD Pipeline / post-deploy-checks (push) Successful in 1m28s
2026-05-07 00:58:20 +08:00
Your Name
f046742a4f docs(logbook): record gateway mirror deploy verification [skip ci] 2026-05-07 00:49:18 +08:00
AWOOOI CD
b1167edde7 chore(cd): deploy 82e9aea [skip ci] 2026-05-07 00:46:57 +08:00
Your Name
82e9aea057 fix(telegram): mirror remaining gateway sends
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m26s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-07 00:42:21 +08:00
Your Name
2a8b96cc7f docs(logbook): record outbound mirror log evidence [skip ci] 2026-05-07 00:41:02 +08:00
Your Name
328b24de6a docs(logbook): record direct telegram send convergence [skip ci] 2026-05-07 00:40:30 +08:00
AWOOOI CD
de4d35e184 chore(cd): deploy ecc65be [skip ci] 2026-05-06 16:38:14 +00:00
Your Name
ecc65be6e1 fix(telegram): route direct sends through gateway
All checks were successful
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
2026-05-07 00:33:27 +08:00
Your Name
7b98f71393 docs(logbook): record telegram outbound mirror deploy [skip ci] 2026-05-07 00:31:30 +08:00
AWOOOI CD
cf0b6be695 chore(cd): deploy 9365bda [skip ci] 2026-05-07 00:28:43 +08:00
Your Name
9365bdab93 fix(awooop): mirror telegram outbound messages
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m57s
CD Pipeline / post-deploy-checks (push) Successful in 1m27s
2026-05-07 00:23:32 +08:00
Your Name
012cd27b4a docs(logbook): record telegram dedup deploy verification [skip ci] 2026-05-06 22:44:08 +08:00
AWOOOI CD
678d489978 chore(cd): deploy c5964fb [skip ci] 2026-05-06 14:41:33 +00:00
Your Name
c5964fbcd3 fix(telegram): deduplicate repeated failure updates
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m47s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
2026-05-06 22:36:44 +08:00
Your Name
886657473e docs(logbook): record awooop console deploy verification [skip ci] 2026-05-06 22:32:46 +08:00
AWOOOI CD
d2d29185c9 chore(cd): deploy 7f4f5b2 [skip ci] 2026-05-06 22:29:34 +08:00
Your Name
7f4f5b24ba fix(awooop): clarify operator disposition lanes
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m43s
CD Pipeline / post-deploy-checks (push) Successful in 1m32s
2026-05-06 22:24:28 +08:00
Your Name
d2205dc1c0 docs(logbook): record diagnosis lane deploy verification [skip ci] 2026-05-06 22:12:32 +08:00
AWOOOI CD
19e721d4af chore(cd): deploy 9dfecc4 [skip ci] 2026-05-06 14:09:14 +00:00
Your Name
9dfecc4d1b fix(telegram): separate ssh diagnosis from repair failures
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m27s
CD Pipeline / build-and-deploy (push) Successful in 4m19s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
2026-05-06 22:03:19 +08:00
Your Name
53994e75f0 docs(logbook): record ssh mcp deploy verification [skip ci] 2026-05-06 21:59:25 +08:00
AWOOOI CD
2e06077337 chore(cd): deploy 8396d37 [skip ci] 2026-05-06 21:56:02 +08:00
Your Name
8396d37275 fix(mcp): harden ssh provider connection params
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 59s
CD Pipeline / build-and-deploy (push) Successful in 3m20s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-06 21:51:38 +08:00
Your Name
150f17b219 docs(logbook): record incident list deploy verification [skip ci] 2026-05-06 21:36:24 +08:00
AWOOOI CD
9a3afa11ed chore(cd): deploy edef1aa [skip ci] 2026-05-06 21:32:19 +08:00
Your Name
edef1aa4c7 fix(incidents): batch decision token lookup
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m20s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-06 21:27:46 +08:00
AWOOOI CD
780a742110 chore(cd): deploy a0179ce [skip ci] 2026-05-06 21:22:23 +08:00
Your Name
a0179cec6e fix(incidents): keep list endpoint pure read
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m7s
CD Pipeline / build-and-deploy (push) Successful in 3m26s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-06 21:17:25 +08:00
Your Name
ea6b7d8f27 docs(logbook): record notification deploy verification [skip ci] 2026-05-06 21:09:30 +08:00
AWOOOI CD
dd75a3b943 chore(cd): deploy ea5ad04 [skip ci] 2026-05-06 21:04:59 +08:00
Your Name
ea5ad040da fix(telegram): clarify automation notification state
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m16s
CD Pipeline / build-and-deploy (push) Successful in 3m39s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-06 20:59:58 +08:00
Your Name
b2f0db0717 docs(logbook): record awoo op console verification [skip ci] 2026-05-06 20:34:28 +08:00
Your Name
93c4b62826 docs(logbook): record openclaw fallback deployment [skip ci] 2026-05-06 20:28:46 +08:00
AWOOOI CD
a132bee1d7 chore(cd): deploy d0e9819 [skip ci] 2026-05-06 20:25:44 +08:00
Your Name
d0e98192de fix(ai): keep openclaw before gemini in alert fallback
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 3m28s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-06 20:20:58 +08:00
AWOOOI CD
bcb9397c38 chore(cd): deploy 1a1ab0d [skip ci] 2026-05-06 20:16:22 +08:00
Your Name
1a1ab0df6e fix(ai): route alerts through openclaw before gemini
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m42s
CD Pipeline / post-deploy-checks (push) Successful in 1m36s
2026-05-06 20:11:24 +08:00
Your Name
572e7640cd docs(logbook): record openclaw nemo hotfix status 2026-05-06 19:53:53 +08:00
AWOOOI CD
2ece75935e chore(cd): deploy 2aaaa56 [skip ci] 2026-05-06 19:44:11 +08:00
Your Name
2aaaa5654f fix(drift): parse ollama json wrapped responses
All checks were successful
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Successful in 1m16s
CD Pipeline / build-and-deploy (push) Successful in 3m52s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s
2026-05-06 19:39:01 +08:00
Your Name
8882301243 docs(logbook): record drift ollama live verification 2026-05-06 19:36:44 +08:00
AWOOOI CD
3aba5c7f9a chore(cd): deploy 2ef54cc [skip ci] 2026-05-06 19:32:23 +08:00
Your Name
2ef54ccc94 fix(ai): enforce ollama first for drift governance
All checks were successful
Code Review / ai-code-review (push) Successful in 16s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 4m54s
CD Pipeline / post-deploy-checks (push) Successful in 3m10s
2026-05-06 19:26:09 +08:00
AWOOOI CD
d90414ddfa chore(cd): deploy a158b77 [skip ci] 2026-05-06 18:03:48 +08:00
Your Name
a158b77422 feat(heartbeat): show ollama endpoint topology
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m16s
CD Pipeline / build-and-deploy (push) Successful in 3m30s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-06 17:58:56 +08:00
Your Name
d79ec4f647 docs(ops): record ollama retirement verification 2026-05-06 17:53:40 +08:00
AWOOOI CD
ef3b05439a chore(cd): deploy 0e2e856 [skip ci] 2026-05-06 09:46:24 +00:00
Your Name
0e2e856f12 fix(mcp): normalize audit session ids
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 58s
CD Pipeline / build-and-deploy (push) Successful in 4m39s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-06 17:40:42 +08:00
AWOOOI CD
9b0f55fd90 chore(cd): deploy 7473a01 [skip ci] 2026-05-06 17:34:22 +08:00
Your Name
7473a01322 fix(awooop): route runs list before dynamic run lookup
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 3m22s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-06 17:29:56 +08:00
AWOOOI CD
38b61e290e chore(cd): deploy fa0e956 [skip ci] 2026-05-06 17:23:18 +08:00
Your Name
fa0e956c0e fix(mcp): tag legacy provider calls with audit context
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 59s
CD Pipeline / build-and-deploy (push) Successful in 3m22s
CD Pipeline / post-deploy-checks (push) Successful in 1m19s
2026-05-06 17:18:52 +08:00
AWOOOI CD
76aaaf480c chore(cd): deploy c1ac157 [skip ci] 2026-05-06 17:08:36 +08:00
Your Name
c1ac157aaf fix(km): keep backfill reconciler loop alive
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m12s
CD Pipeline / build-and-deploy (push) Successful in 4m2s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-06 17:03:22 +08:00
AWOOOI CD
73d7e332a4 chore(cd): deploy 33f85ec [skip ci] 2026-05-06 16:58:49 +08:00
Your Name
33f85ec8ca fix(logging): redact telegram bot urls
All checks were successful
Code Review / ai-code-review (push) Successful in 17s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 1m15s
2026-05-06 16:54:14 +08:00
AWOOOI CD
38a4748e17 chore(cd): deploy 8f715fd [skip ci] 2026-05-06 16:50:14 +08:00
Your Name
8f715fd3f2 fix(telegram): sanitize failover alert errors
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m25s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-06 16:45:47 +08:00
AWOOOI CD
a94435f143 chore(cd): deploy a7a9ba9 [skip ci] 2026-05-06 16:39:29 +08:00
Your Name
a7a9ba996d fix(mcp): audit approved ssh execution path
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / build-and-deploy (push) Successful in 3m45s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
2026-05-06 16:34:39 +08:00
Your Name
fcf93aac11 fix(ci): retry owner-required migrations safely
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-06 16:31:04 +08:00
Your Name
1d9dbac112 docs(awooop): record mcp audit migration owner gap 2026-05-06 16:29:35 +08:00
AWOOOI CD
4e9981c182 chore(cd): deploy 7ed8c95 [skip ci] 2026-05-06 16:27:04 +08:00
Your Name
7ed8c95409 fix(mcp): persist blocked gateway audit rows
Some checks failed
Code Review / ai-code-review (push) Successful in 16s
run-migration / migrate (push) Failing after 9s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m59s
CD Pipeline / post-deploy-checks (push) Successful in 1m46s
2026-05-06 16:21:43 +08:00
AWOOOI CD
1e68d45659 chore(cd): deploy 60c00d7 [skip ci] 2026-05-06 16:15:52 +08:00
Your Name
60c00d7a5d fix(mcp): tolerate legacy tool DTO fields
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 3m18s
CD Pipeline / post-deploy-checks (push) Successful in 1m27s
2026-05-06 16:11:26 +08:00
AWOOOI CD
72811b967e chore(cd): deploy 927c2a7 [skip ci] 2026-05-06 16:06:58 +08:00
Your Name
927c2a758d fix(mcp): accept legacy tool result data alias
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 3m24s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-06 16:02:27 +08:00
Your Name
e5094c5c53 fix(cd): harden 188 ops sync timeouts
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 15:42:30 +08:00
AWOOOI CD
154aec849e chore(cd): deploy 2245316 [skip ci] 2026-05-06 15:35:05 +08:00
Your Name
22453161e9 fix(ai): restore dynamic baseline holt winters fit
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 59s
CD Pipeline / build-and-deploy (push) Successful in 8m20s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
2026-05-06 15:30:31 +08:00
Your Name
d3e1b61096 fix(ops): persist 188 ollama localhost binding
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
2026-05-06 15:27:19 +08:00
Your Name
f88a3a846b fix(ops): contain 188 ollama gateway exposure
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 15:18:28 +08:00
Your Name
2adbf1e6cd fix(cd): timeout 188 ops sync
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 15:04:38 +08:00
AWOOOI CD
6c4f8379ad chore(cd): deploy d441f70 [skip ci] 2026-05-06 07:00:07 +00:00
Your Name
d441f70693 fix(ai): add 188 ollama retirement gate
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m2s
CD Pipeline / build-and-deploy (push) Successful in 9m2s
CD Pipeline / post-deploy-checks (push) Successful in 1m15s
2026-05-06 14:55:21 +08:00
AWOOOI CD
033ac8129b chore(cd): deploy 4111ea4 [skip ci] 2026-05-06 14:40:02 +08:00
Your Name
4111ea4f9f fix(ai): remove 188 ollama provider
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 3m36s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
2026-05-06 14:34:48 +08:00
OG T
578bf3bc7c docs: enforce traditional chinese documentation 2026-05-06 14:07:02 +08:00
OG T
ffd767d4bb docs(logbook): record alertmanager restart silence 2026-05-06 13:55:12 +08:00
OG T
6e2ab7cedc fix(alertmanager): make live config deployment safe
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 13:52:57 +08:00
OG T
c4f40235f4 fix(alertmanager): gate direct telegram to alertchain emergencies
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 13:45:33 +08:00
OG T
4753099155 fix(alertmanager): send direct alerts to sre group
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 13:38:47 +08:00
AWOOOI CD
eb71bc61ed chore(cd): deploy 8ae7789 [skip ci] 2026-05-06 13:31:01 +08:00
OG T
8ae7789e93 fix(cd): use absolute ssh key paths
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 13:25:45 +08:00
OG T
2c2bf9d665 fix(awooop): use shared redis for approval gates
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m0s
CD Pipeline / build-and-deploy (push) Failing after 4m6s
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-06 13:18:43 +08:00
AWOOOI CD
56b4d8165b chore(cd): deploy c696b99 [skip ci] 2026-05-06 13:10:34 +08:00
OG T
c696b99ccf fix(awooop): authenticate approval decisions
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 3m28s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
2026-05-06 13:05:51 +08:00
OG T
e6eae5cdc4 docs(awooop): unify flywheel integration plan 2026-05-06 12:54:35 +08:00
AWOOOI CD
072cc23a42 chore(cd): deploy 682c0b9 [skip ci] 2026-05-06 12:51:20 +08:00
OG T
682c0b9995 fix(web): render AwoooP index directly
Some checks are pending
CD Pipeline / post-deploy-checks (push) Blocked by required conditions
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Successful in 1m12s
CD Pipeline / build-and-deploy (push) Successful in 3m36s
2026-05-06 12:46:24 +08:00
AWOOOI CD
96ad3a18ee chore(cd): deploy 9ef9633 [skip ci] 2026-05-06 12:42:30 +08:00
Your Name
9ef9633aff fix(alerts): bypass proxy timeout for GCP Ollama 2026-05-06 08:55:14 +08:00
AWOOOI CD
df5e6c6626 chore(cd): deploy d2aebdd [skip ci] 2026-05-06 07:33:25 +08:00
Your Name
d2aebdd477 fix(cd): avoid host-key prompt during deploy
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 07:27:57 +08:00
Your Name
09256be62c fix(rag): use bge embeddings on GCP Ollama lane
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m22s
CD Pipeline / build-and-deploy (push) Failing after 2h14m5s
CD Pipeline / post-deploy-checks (push) Has been cancelled
2026-05-06 05:49:37 +08:00
AWOOOI CD
a4fece11cc chore(cd): deploy c2c0b1e [skip ci] 2026-05-06 05:32:51 +08:00
Your Name
c2c0b1ec82 fix(alerts): let GCP Ollama finish before cloud fallback
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m9s
CD Pipeline / build-and-deploy (push) Successful in 4m21s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s
2026-05-06 05:27:55 +08:00
AWOOOI CD
1d0e80c091 chore(cd): deploy 3b64d66 [skip ci] 2026-05-06 03:38:45 +08:00
Your Name
3b64d66836 fix(alerts): guard approval actions and wire playbook learning
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 42s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m18s
2026-05-06 03:34:24 +08:00
Your Name
5890fffd7f docs(awooop): record control plane bootstrap seed 2026-05-06 00:59:58 +08:00
AWOOOI CD
eced8617d3 chore(cd): deploy a2c4b3d [skip ci] 2026-05-06 00:53:15 +08:00
Your Name
587551c1f1 fix(ops): monitor full-stack cold-start gates
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 18s
2026-05-06 00:48:05 +08:00
Your Name
a2c4b3d47e fix(awooop): align console with flywheel execution metrics
Some checks failed
Code Review / ai-code-review (push) Has been cancelled
CD Pipeline / tests (push) Successful in 2m22s
CD Pipeline / build-and-deploy (push) Successful in 3m54s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s
2026-05-06 00:46:08 +08:00
Your Name
20ef0c1455 docs(ops): record momo reboot noise cleanup 2026-05-06 00:34:25 +08:00
AWOOOI CD
cb9551fb00 chore(cd): deploy 5ed396e [skip ci] 2026-05-06 00:24:17 +08:00
Your Name
5ed396e390 fix(decision): derive telegram dedup from incident signals
All checks were successful
CD Pipeline / tests (push) Successful in 58s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 3m30s
CD Pipeline / post-deploy-checks (push) Successful in 2m19s
2026-05-06 00:19:35 +08:00
Your Name
6e96623884 fix(ops): harden momo scheduler cold start gate
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-06 00:15:14 +08:00
AWOOOI CD
87ce02f34d chore(cd): deploy 2aa31c2 [skip ci] 2026-05-06 00:10:42 +08:00
Your Name
0315c2b510 docs(ops): codify full stack cold start recovery
All checks were successful
Code Review / ai-code-review (push) Successful in 7s
2026-05-06 00:07:57 +08:00
Your Name
2aa31c205a fix(ai): require 111 before alert cloud fallback
All checks were successful
CD Pipeline / tests (push) Successful in 54s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 3m21s
CD Pipeline / post-deploy-checks (push) Successful in 2m2s
2026-05-06 00:05:51 +08:00
Your Name
23932773ef fix(monitoring): route docker baseline alerts to ssh
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 19s
2026-05-06 00:00:12 +08:00
Your Name
2f50c67f5c fix(monitoring): keep host alert ssh diagnostics canonical
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 20s
E2E Health Check / e2e-health (push) Successful in 2m35s
2026-05-05 23:57:53 +08:00
Your Name
85d5b5c823 fix(cd): clear empty docker build locks
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-05 23:48:35 +08:00
AWOOOI CD
25b1923d2e chore(cd): deploy e208798 [skip ci] 2026-05-05 23:44:08 +08:00
Your Name
e208798531 fix(ai): keep GCP Ollama lane on safe models
All checks were successful
CD Pipeline / tests (push) Successful in 54s
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / build-and-deploy (push) Successful in 3m25s
CD Pipeline / post-deploy-checks (push) Successful in 1m50s
2026-05-05 23:37:33 +08:00
AWOOOI CD
1ba36697ca chore(cd): deploy 405b8b8 [skip ci] 2026-05-05 23:34:17 +08:00
Your Name
405b8b8ef9 fix(ops): bring drift scanner under gitops
Some checks failed
CD Pipeline / tests (push) Successful in 59s
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / build-and-deploy (push) Successful in 8m52s
CD Pipeline / post-deploy-checks (push) Has been cancelled
2026-05-05 23:20:12 +08:00
Your Name
1cc215ec30 fix(ops): keep Ollama health checks on alert fast model
Some checks failed
CD Pipeline / tests (push) Successful in 52s
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
2026-05-05 23:16:21 +08:00
AWOOOI CD
83daeb3f87 chore(cd): deploy c4854bb [skip ci] 2026-05-05 23:10:29 +08:00
Your Name
c4854bb355 fix(ai): isolate heavy Ollama workloads from GCP alert lane
All checks were successful
CD Pipeline / tests (push) Successful in 54s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 3m12s
2026-05-05 23:06:07 +08:00
Your Name
1dcc6d61dc fix(ops): retry cold-start HTTP probes
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-05 22:56:57 +08:00
Your Name
ed7c6946cb docs(awooop): define private Ollama mesh gateway
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-05 22:56:22 +08:00
AWOOOI CD
7baa316224 chore(cd): deploy e8f2792 [skip ci] 2026-05-05 22:48:02 +08:00
Your Name
31fd9cbf48 docs(ops): record GCP Ollama alert hotfix 2026-05-05 22:45:40 +08:00
Your Name
e8f279280f fix(cd): install buildx for buildkit builds
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-05 22:39:04 +08:00
Your Name
787acd3bda fix(cd): disable buildkit on host runner
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
2026-05-05 22:26:07 +08:00
Your Name
86bd6432ee fix(ops): make bge-m3 migration idempotent
Some checks failed
Code Review / ai-code-review (push) Successful in 9s
run-migration / migrate (push) Successful in 7s
CD Pipeline / tests (push) Successful in 2m8s
CD Pipeline / build-and-deploy (push) Failing after 9s
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-05 22:21:47 +08:00
Your Name
bf847ad045 fix(ai): stabilize GCP Ollama alert lane
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
2026-05-05 22:20:27 +08:00
Your Name
a4e9a04982 fix(ops): harden cold-start schedule recovery
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 7s
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
2026-05-05 22:17:10 +08:00
AWOOOI CD
72a1d33f9d chore(cd): deploy bec8212 [skip ci] 2026-05-05 21:59:52 +08:00
Your Name
bec82127e7 fix(cd): install docker cli in host runner bootstrap
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-05 21:47:13 +08:00
Your Name
8f83773431 fix(cd): preserve remote kubectl in secrets injection
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
2026-05-05 21:39:26 +08:00
Your Name
8495a45002 fix(cd): bootstrap host runner tools
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
2026-05-05 21:25:52 +08:00
Your Name
333c8a9cfd fix(cd): target k3s control plane for deploy
Some checks failed
CD Pipeline / tests (push) Failing after 1s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Successful in 10s
2026-05-05 21:21:00 +08:00
Your Name
1baeb7ee61 chore(cd): deploy ee5e3bc [skip ci] 2026-05-05 21:09:09 +08:00
Your Name
ee5e3bc94f fix(openclaw): gate alert cloud fallback behind flag
Some checks failed
Code Review / ai-code-review (push) Successful in 27s
CD Pipeline / tests (push) Successful in 5m17s
CD Pipeline / build-and-deploy (push) Failing after 5m35s
CD Pipeline / post-deploy-checks (push) Has been skipped
2026-05-05 20:54:47 +08:00
AWOOOI CD
7b0a4bce98 chore(cd): deploy 2221fd3 [skip ci] 2026-05-05 16:26:09 +08:00
Your Name
2221fd3256 fix(ops): persist host resource guardrails
All checks were successful
CD Pipeline / tests (push) Successful in 5m25s
Code Review / ai-code-review (push) Successful in 25s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 37s
CD Pipeline / build-and-deploy (push) Successful in 7m31s
CD Pipeline / post-deploy-checks (push) Successful in 5m10s
2026-05-05 16:13:19 +08:00
AWOOOI CD
84a661beaf chore(cd): deploy 6b93c8f [skip ci] 2026-05-05 16:11:35 +08:00
335 changed files with 48182 additions and 3735 deletions

View File

@@ -1,832 +0,0 @@
{
"permissions": {
"allow": [
"Read(**)",
"Glob(**)",
"Grep(**)",
"Bash(curl *)",
"Bash(kubectl get *)",
"Bash(kubectl describe *)",
"Bash(kubectl logs *)",
"Bash(kubectl rollout status *)",
"Bash(docker ps *)",
"Bash(docker logs *)",
"Bash(ls *)",
"Bash(cat *)",
"Bash(head *)",
"Bash(tail *)",
"Bash(grep *)",
"Bash(find *)",
"Bash(pwd)",
"Bash(which *)",
"Bash(echo *)",
"Bash(git status *)",
"Bash(git log *)",
"Bash(git diff *)",
"Bash(git branch *)",
"Bash(git remote *)",
"Edit(**)",
"Write(apps/**)",
"Write(packages/**)",
"Write(docs/**)",
"Write(.agents/**)",
"Write(k8s/**)",
"Write(scripts/**)",
"Bash(pnpm *)",
"Bash(npm *)",
"Bash(npx *)",
"Bash(node *)",
"Bash(python *)",
"Bash(python3 *)",
"Bash(pip *)",
"Bash(cd *)",
"Bash(mkdir *)",
"Bash(touch *)",
"Bash(cp *)",
"Bash(mv *)",
"Bash(chmod *)",
"Bash(pytest *)",
"Bash(playwright *)",
"Bash(git add *)",
"Bash(git commit *)",
"Bash(git stash *)",
"Bash(ssh *)",
"Bash(scp *)",
"Bash(export KUBECONFIG=*)",
"Bash(git push:*)",
"Bash(claude --version)",
"Bash(git check-ignore:*)",
"WebSearch",
"Bash(claude plugin:*)",
"Bash(claude --channels)",
"Bash(claude --channels plugin:telegram@claude-plugins-official --help)",
"Bash(bash)",
"Bash(source ~/.zshrc)",
"Bash(~/.bun/bin/bun --version)",
"Bash(env)",
"Bash(claude upgrade:*)",
"Bash(/Users/ogt/.local/bin/claude --help)",
"Bash(CLAUDE_CODE_EXPERIMENTAL_CHANNELS=1 claude --help)",
"Bash(claude --channels plugin:telegram@claude-plugins-official --print \"hello\")",
"Bash(mkdir -p ~/.claude/channels/telegram)",
"Bash(~/.claude/channels/telegram/.env)",
"Bash(~/.bun/bin/bun run:*)",
"Bash(sudo ln:*)",
"Bash(ln -sf ~/.bun/bin/bun /opt/homebrew/bin/bun)",
"Bash(xargs python:*)",
"Bash(uv --version)",
"Bash(pip3 install:*)",
"Bash(pip3 show:*)",
"Bash(ruff *)",
"Bash(mypy *)",
"Bash(black *)",
"Bash(isort *)",
"Bash(timeout *)",
"Bash(wc *)",
"Bash(sort *)",
"Bash(uniq *)",
"Bash(awk *)",
"Bash(sed *)",
"Bash(tr *)",
"Bash(tee *)",
"Bash(xargs *)",
"Bash(test *)",
"Bash([ *)",
"Bash(true)",
"Bash(false)",
"Bash(date *)",
"Bash(sleep *)",
"Bash(kill *)",
"Bash(pkill *)",
"Bash(ps *)",
"Bash(top *)",
"Bash(htop *)",
"Bash(df *)",
"Bash(du *)",
"Bash(free *)",
"Bash(uname *)",
"Bash(hostname *)",
"Bash(whoami)",
"Bash(id *)",
"Bash(groups *)",
"Bash(stat *)",
"Bash(file *)",
"Bash(realpath *)",
"Bash(dirname *)",
"Bash(basename *)",
"Bash(type *)",
"Bash(command *)",
"Bash(hash *)",
"Bash(alias *)",
"Bash(set *)",
"Bash(unset *)",
"Bash(printenv *)",
"Bash(diff *)",
"Bash(cmp *)",
"Bash(comm *)",
"Bash(join *)",
"Bash(paste *)",
"Bash(cut *)",
"Bash(rev *)",
"Bash(nl *)",
"Bash(fmt *)",
"Bash(fold *)",
"Bash(pr *)",
"Bash(expand *)",
"Bash(unexpand *)",
"Bash(od *)",
"Bash(xxd *)",
"Bash(hexdump *)",
"Bash(strings *)",
"Bash(base64 *)",
"Bash(md5sum *)",
"Bash(sha256sum *)",
"Bash(jq *)",
"Bash(yq *)",
"Bash(gh *)",
"Bash(docker build *)",
"Bash(docker run *)",
"Bash(docker exec *)",
"Bash(docker compose *)",
"Bash(docker-compose *)",
"Bash(docker images *)",
"Bash(docker inspect *)",
"Bash(docker network *)",
"Bash(docker volume *)",
"Bash(kubectl apply *)",
"Bash(kubectl create *)",
"Bash(kubectl exec *)",
"Bash(kubectl port-forward *)",
"Bash(kubectl config *)",
"Bash(helm *)",
"Bash(terraform *)",
"Bash(ansible *)",
"Bash(bun *)",
"Bash(deno *)",
"Bash(cargo *)",
"Bash(rustc *)",
"Bash(go *)",
"Bash(java *)",
"Bash(javac *)",
"Bash(gradle *)",
"Bash(mvn *)",
"Bash(make *)",
"Bash(cmake *)",
"Bash(ninja *)",
"Bash(uv *)",
"Bash(poetry *)",
"Bash(pipx *)",
"Bash(virtualenv *)",
"Bash(venv *)",
"Bash(conda *)",
"Bash(brew *)",
"Bash(apt *)",
"Bash(apt-get *)",
"Bash(yum *)",
"Bash(dnf *)",
"Bash(pacman *)",
"Bash(snap *)",
"Bash(flatpak *)",
"Bash(systemctl status *)",
"Bash(journalctl *)",
"Bash(service * status)",
"Bash(nc *)",
"Bash(netstat *)",
"Bash(ss *)",
"Bash(lsof *)",
"Bash(nmap *)",
"Bash(dig *)",
"Bash(nslookup *)",
"Bash(host *)",
"Bash(ping *)",
"Bash(traceroute *)",
"Bash(mtr *)",
"Bash(wget *)",
"Bash(http *)",
"Bash(httpie *)",
"Bash(hadolint apps/api/Dockerfile)",
"Bash(docker info:*)",
"Bash(kubectl cluster-info:*)",
"Read(//var/run/**)",
"Bash(open -a Docker)",
"Bash(git rm:*)",
"Bash(git reset:*)",
"Bash(kubectl --kubeconfig ~/.kube/config get pods -n awoooi -o wide)",
"Bash(kubectl scale:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollam@192.168.0.188 \"docker ps -a | grep -i claw\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker start clawbot && sleep 3 && docker logs clawbot --tail=10\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep clawbot && docker port clawbot\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=30\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot/.env | grep -E ''\\(TG_|TELEGRAM\\)'' | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Mounts}}{{.Source}}:{{.Destination}} {{end}}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep -E ''\\(TG_|TELEGRAM|ENABLED\\)''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''logout\\\\|log.out\\\\|shutdown\\\\|stop'' | tail -20\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''\\(getMe|getUpdates|sendMessage\\).*200'' | tail -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''success\\\\|started\\\\|初始化'' | head -20\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''2026-03-\\(19|20|21\\)'' | grep -i ''error\\\\|fail\\\\|logout\\\\|400\\\\|401'' | head -20\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop clawbot && docker rm clawbot && echo ''✅ OpenClaw 已永久停用''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose ps 2>/dev/null || ls -la docker-compose.yml 2>/dev/null || find /home/ollama -name ''docker-compose*.yml'' -type f 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose up -d && sleep 3 && docker-compose ps\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d 2>&1 || docker run -d --name clawbot --restart unless-stopped -p 8088:8088 -v /var/run/docker.sock:/var/run/docker.sock 192.168.0.110:5000/library/clawbot:stable-v6 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=15 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep -E ''clawbot|litellm''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAHrJ5CMOb4rP0IYJrCUiDViLsnpK69uEUI|'' .env && grep TELEGRAM_BOT_TOKEN .env\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''{{.Names}}'' | grep -i alert\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop alertmanager && docker rm alertmanager && echo ''✅ 舊 AIOPS Alertmanager 已停用''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Image}}\\\\t{{.Status}}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/prometheus/alert_rules.yml 2>/dev/null | grep -A5 ''ClawbotDown\\\\|telegram\\\\|AIOPS'' | head -30\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"find /home/ollama -name ''*.yml'' -type f 2>/dev/null | xargs grep -l ''ClawbotDown\\\\|telegram'' 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''協同警報\\\\|ClawbotDown'' /app 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/prometheus.yml 2>/dev/null | grep -A10 ''alerting\\\\|alertmanager''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -i alert || echo ''✅ 沒有 alertmanager 在運行''\")",
"Bash(jq -r '.status, .components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep clawbot && docker logs clawbot --tail=15\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep TELEGRAM\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAFjDyjAN94QQrjn1gBnFXAyS20EUyozH8c|'' .env && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''ClawBotDown\\\\|ClawbotDown'' /app 2>/dev/null | head -5 || echo ''在程式碼中找不到''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -A10 ''ClawBot\\\\|clawbot'' | head -30\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -i ''clawbot\\\\|claw'' -A5 -B5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since=5m 2>&1 | grep -i ''clawbot\\\\|incident\\\\|alert'' | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 50 2>&1\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|polling\\\\|bot'' | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Ports}}'' | grep -E ''claw|NAME''\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''telegram|Telegram|error|Error'' | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep ollama\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sed -i ''s|host.docker.internal|172.17.0.1|g'' /home/ollama/clawbot-v5/.env && cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose restart clawbot && sleep 3 && docker logs clawbot --tail 30 2>&1\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose restart clawbot && sleep 5 && docker logs clawbot --tail 30 2>&1\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 200\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -10\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -iE ''error|telegram|polling|alert|send'' | tail -30\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d --force-recreate clawbot && sleep 5 && docker logs clawbot 2>&1 | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 100\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since 5m 2>&1 | tail -30\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db psql -U postgres -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec -e PGPASSWORD=clawbot123 momo-db psql -U clawbot -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -E ''postgres|db''\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db env | grep -i postgres\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"PGPASSWORD=AwoooiProd2026 psql -h localhost -U awoooi -d awoooi_prod -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
"Bash(KUBECONFIG=~/.kube/config kubectl config get-contexts)",
"Bash(docker tag:*)",
"Bash(docker push:*)",
"Bash(ssh ollama@192.168.0.188 \"cd ~/awoooi-build && find apps/web/src -name ''''*.ts'''' -o -name ''''*.tsx'''' | head -30 | xargs md5sum\")",
"Bash(rsync -avz --exclude 'node_modules' --exclude '.next' --exclude '.turbo' --exclude '*.log' /Users/ogt/awoooi/ ollama@192.168.0.188:~/awoooi-build/)",
"Bash(gh run:*)",
"Bash(APPROVAL_ID=\"ea43578e-17cd-40b9-b4c3-8fe8e92f225c\" __NEW_LINE_76dc92b2699cd7d5__ echo \"=== 檢查 Approval Metadata ===\" curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
"Bash(APPROVAL_ID=\"865ab726-c3b9-447e-86a9-65a6227516e6\" __NEW_LINE_db14ef76ca26af32__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Test resolution\"\"\"\"}')",
"Read(//Users/ogt/awoooi/**)",
"Bash(APPROVAL_ID=\"e9445e68-6c3e-4899-b507-3b9b7bcaf0a7\" __NEW_LINE_680ad94d4896e58a__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Final test\"\"\"\"}')",
"Bash(APPROVAL_ID=\"eb0afb4e-834b-4af7-9ae0-3c58232fdd99\" INCIDENT=\"INC-20260323-F05CD6\" __NEW_LINE_47f1c3803a64b43c__ echo \"=== 簽核前 Incident 狀態 ===\" curl -s \"https://awoooi.wooo.work/api/v1/incidents/$INCIDENT\")",
"Bash(mkdir -p /Users/ogt/awoooi/.claude/hooks)",
"Bash(/Users/ogt/awoooi/.claude/hooks/pre-commit-check.sh:*)",
"Bash(git -C /Users/ogt/awoooi status packages/lewooogo-core/)",
"Bash(git -C /Users/ogt/awoooi ls-files packages/lewooogo-core/src/)",
"Bash(git -C /Users/ogt/awoooi status --short)",
"Bash(git -C /Users/ogt/awoooi add apps/api/pyproject.toml apps/api/scripts/ apps/api/src/ apps/web/.eslintrc.js apps/web/src/ packages/lewooogo-core/.eslintrc.js)",
"Bash(git -C /Users/ogt/awoooi diff --cached --stat)",
"Bash(git -C:*)",
"Bash(for wf:*)",
"Bash(do)",
"Bash(done)",
"Bash(jq 'if type == \"\"\"\"array\"\"\"\" then .[0] | {incident_id, status, decision} else . end')",
"Bash(PYTHONPATH=. python -c \"from src.api.v1.stats import router; print\\(''✅ stats.py 載入成功,路由數:'', len\\(router.routes\\)\\)\")",
"Bash(PYTHONPATH=. pytest tests/ -v --tb=short)",
"Bash(PYTHONPATH=. pytest tests/test_stats_api.py -v --tb=short)",
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=long)",
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=short)",
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py -v --tb=short)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns awoooi && kubectl get all -n awoooi')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=50')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=100 | grep -i telegram')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=50 | grep -i webhook')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=30')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n monitoring | grep alertmanager')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | head -50\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc -n awoooi-prod')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl patch configmap alertmanager-config -n monitoring --type merge -p ''{\"\"data\"\":{\"\"alertmanager.yml\"\":\"\"global:\\\\n resolve_timeout: 5m\\\\n\\\\nroute:\\\\n group_by: [\\\\\"\"alertname\\\\\"\", \\\\\"\"severity\\\\\"\"]\\\\n group_wait: 30s\\\\n group_interval: 5m\\\\n repeat_interval: 4h\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n routes:\\\\n - match:\\\\n severity: critical\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 10s\\\\n repeat_interval: 1h\\\\n - match:\\\\n severity: warning\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 1m\\\\n repeat_interval: 4h\\\\n\\\\nreceivers:\\\\n - name: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n webhook_configs:\\\\n - url: \\\\\"\"http://192.168.0.120:32334/api/v1/webhook/alertmanager\\\\\"\"\\\\n send_resolved: true\\\\n\\\\ninhibit_rules:\\\\n - source_match:\\\\n severity: \\\\\"\"critical\\\\\"\"\\\\n target_match:\\\\n severity: \\\\\"\"warning\\\\\"\"\\\\n equal: [\\\\\"\"alertname\\\\\"\", \\\\\"\"instance\\\\\"\"]\\\\n\"\"}}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl rollout restart deployment/alertmanager -n monitoring && kubectl rollout status deployment/alertmanager -n monitoring')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | grep -A 3 ''url:''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o jsonpath=\"\"{range .items[*]}{.metadata.name}{\\\\\"\" \\\\\"\"}{.spec.containers[*].image}{\\\\\"\"\\\\\\\\n\\\\\"\"}{end}\"\"')",
"Bash(git mv:*)",
"Bash(for file:*)",
"Bash(do echo:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 wooo@192.168.0.120 \"echo ''Connected''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get deployment -n awoooi-prod -o jsonpath=''{range .items[*]}{.metadata.name}{\"\" selector: \"\"}{.spec.selector.matchLabels}{\"\"\\\\n\"\"}{end}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl delete deployment awoooi-api awoooi-web awoooi-worker -n awoooi-prod\")",
"WebFetch(domain:awoooi.wooo.work)",
"WebFetch(domain:api.awoooi.wooo.work)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o wide')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc,ingress -n awoooi-prod')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'curl -sf http://10.43.125.201:8000/api/v1/health 2>&1 || echo \"\"FAILED\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo nginx -t 2>&1 && sudo cat /etc/nginx/sites-enabled/awoooi* 2>/dev/null || sudo cat /etc/nginx/conf.d/awoooi* 2>/dev/null || echo \"\"No awoooi nginx config found\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'cat /etc/nginx/sites-enabled/* 2>/dev/null | grep -A5 awoooi || cat /etc/nginx/conf.d/* 2>/dev/null | grep -A5 awoooi || ls -la /etc/nginx/ 2>/dev/null || echo \"\"No nginx on this host\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'ls /etc/nginx/sites-enabled/ 2>/dev/null && cat /etc/nginx/sites-enabled/*awoooi* 2>/dev/null || echo \"\"Checking conf.d...\"\" && ls /etc/nginx/conf.d/ 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -l awoooi /etc/nginx/sites-enabled/* 2>/dev/null || grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -r \"\"awoooi\\\\|32334\\\\|32335\"\" /etc/nginx/ 2>/dev/null | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/ && echo \"\"Config copied\"\" && sudo nginx -t 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo \"\"No ssl dir\"\" && sudo ls -la /etc/letsencrypt/live/ 2>/dev/null | head -10')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S sed -i \"\"s|/etc/nginx/ssl/awoooi.crt|/etc/letsencrypt/live/awoooi.wooo.work/fullchain.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && sudo sed -i \"\"s|/etc/nginx/ssl/awoooi.key|/etc/letsencrypt/live/awoooi.wooo.work/privkey.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && echo \"\"Paths fixed\"\" && sudo nginx -t 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"Nginx reloaded!\"\" && sleep 2')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -5')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S grep -rl \"\"awoooi.wooo.work\"\" /etc/nginx/ 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo \"\"FAILED to reach 121\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && sudo nginx -t && sudo nginx -s reload && echo \"\"Cleaned up duplicate config\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -30 /var/log/nginx/error.log 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"api.awoooi\"\" /etc/nginx/ 2>/dev/null || echo \"\"No api.awoooi config found\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep -A5 NEXT_PUBLIC')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get deployment awoooi-web -n awoooi-prod -o yaml | grep -A20 \"\"env:\"\" | head -25')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -10 /var/log/nginx/access.log 2>/dev/null | grep awoooi')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -5 /var/log/nginx/error.log 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S stat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null | grep -E \"\"Modify|Change|Birth\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs -n awoooi-prod -l app=awoooi-web --tail=30 2>/dev/null | grep -i \"\"api\\\\|error\\\\|fetch\"\" | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/access.log 2>/dev/null | grep -E \"\"awoooi.*api\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/awoooi-prod-access.log 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- env | grep -i api')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"grep -r \\\\\"\"NEXT_PUBLIC_API_URL\\\\|api.awoooi\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -5 || grep -r \\\\\"\"awoooi.wooo.work\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -3\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"find /app/.next -name \\\\\"\"*.js\\\\\"\" -exec grep -l \\\\\"\"awoooi\\\\\"\" {} \\\\; 2>/dev/null | head -3\"\"')",
"Bash(./scripts/qa-zero-touch.sh)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi.wooo.work.conf /etc/nginx/sites-available/awoooi.wooo.work.conf && sudo nginx -t 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"✅ Nginx reloaded with load balancing!\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt && sudo ls -la sentry 2>/dev/null || echo \"\"Sentry 目錄不存在,需要建立\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'sudo mkdir -p /opt/sentry && sudo chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -5')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"0936223270\"\" | sudo -S mkdir -p /opt/sentry && echo \"\"0936223270\"\" | sudo -S chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -10')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ls -la 2>&1 | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && git describe --tags 2>/dev/null || git rev-parse --short HEAD')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ./install.sh --help 2>&1 | head -30 || echo \"\"No help available, checking script...\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && nohup ./install.sh --skip-user-creation --no-report-self-hosted-issues > /tmp/sentry-install.log 2>&1 &')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'tail -30 /tmp/sentry-install.log 2>/dev/null || echo \"\"日誌檔案尚未建立,等待中...\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^\\\\▶|^Creating|^Starting|^Error|^✓|Pulling\"\" /tmp/sentry-install.log 2>/dev/null | tail -40')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近進度 ===\"\" && tail -10 /tmp/sentry-install.log')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting\"\" /tmp/sentry-install.log | tail -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近 20 行 ===\"\" && tail -20 /tmp/sentry-install.log')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|Building|DONE\"\" /tmp/sentry-install.log | tail -30')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|DONE|Completed|success\"\" /tmp/sentry-install.log | tail -25')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^▶|✓|Error|Completed|success|fail\"\" /tmp/sentry-install.log | tail -15')",
"Bash(redis-cli -h 192.168.0.188 -p 6380 KEYS incident:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/alertmanager.yml 2>/dev/null || cat /etc/alertmanager/alertmanager.yml 2>/dev/null || echo ''Config not found''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 30 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 20 2>&1 | grep -iE ''telegram|send|alert|incident|error''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''TELEGRAM|TG_'' | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''REDIS|POSTGRES|DATABASE'' | head -5\")",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts?active=true\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Active alerts: {len\\(alerts\\)}\\\\\"\"\\)\"\"')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Total alerts: {len\\(alerts\\)}\\\\\"\"\\); [print\\(a[\\\\\"\"labels\\\\\"\"][\\\\\"\"alertname\\\\\"\"]\\) for a in alerts[:5]]\"\"')",
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Status: {d.get\\(\\\\\"\"status\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\(\\\\\"\"message_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"chat_id: {d.get\\(\\\\\"\"chat_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\)\"\"')",
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"status: {d.get\\('status'\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\('message_id'\\)}\\\\\"\"\\); print\\(f\\\\\"\"created_at: {d.get\\('created_at'\\)}\\\\\"\"\\)\"\"')",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *approval*)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *incident*)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *pending*)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/k3s-prod.yaml get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
"Bash(python3 -c \":*)",
"Bash(/tmp/awoooi-tg-secret.yaml:*)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl apply -f /tmp/awoooi-tg-secret.yaml)",
"Bash(for pod:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password wooo@192.168.0.188 \"echo connected && ollama --version\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password ollama@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S curl -fsSL https://ollama.com/install.sh | sudo -S sh\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"ollama --version\")",
"Bash(__NEW_LINE_95e9df111552805b__ echo:*)",
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/nginx/awoooi-prod.conf ollama@192.168.0.188:/tmp/awoooi-prod.conf)",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo ''No ssl dir''; echo ''0936223270'' | sudo -S ls -la /etc/nginx/conf.d/ 2>/dev/null | head -10\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -r ''ssl_certificate'' /etc/nginx/ 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -A 20 ''server_name awoooi'' /etc/nginx/sites-enabled/all-sites.conf 2>/dev/null | head -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/sites-enabled/ 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S nginx -s reload 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S systemctl reload nginx 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs openclaw 2>&1 | tail -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Image}}'' 2>&1 | head -15\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i telegram | tail -20\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml 2>&1 | head -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"curl -sf ''http://localhost:9093/api/v1/alerts'' | jq ''.data | length'' 2>/dev/null || curl -sf ''http://localhost:9093/api/v2/alerts'' | jq ''length'' 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager wget -qO- ''http://localhost:9093/api/v2/alerts'' 2>&1 | head -100\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl -n awoooi-prod logs -l app=awoooi-worker --tail=50 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"cat /home/ollama/alertmanager/alertmanager.yml 2>/dev/null || docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker cp /tmp/alertmanager.yml alertmanager:/etc/alertmanager/alertmanager.yml && docker exec alertmanager amtool check-config /etc/alertmanager/alertmanager.yml && docker kill -s SIGHUP alertmanager\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker inspect alertmanager --format ''{{range .Mounts}}{{.Source}} -> {{.Destination}}{{println}}{{end}}''\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker restart alertmanager && sleep 3 && docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|webhook\\\\|alert'' | tail -10\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -E ''''POST|webhook|alertmanager|ManualTest''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -iE ''''POST|webhook''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>/dev/null | grep -iE ''''POST.*webhook|alertmanager_webhook|NewFingerprint''''\")",
"Bash(kustomize build:*)",
"Bash(KUBECONFIG=~/.kube/config kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl exec deploy/awoooi-api -n awoooi-prod -- env)",
"Bash(git checkout:*)",
"Bash(jq -r '.status // \"\"\"\"failed\"\"\"\"')",
"Bash(jq -r '.total // \"\"\"\"error\"\"\"\"')",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XLEN awoooi:signals)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XRANGE awoooi:signals - + COUNT 5)",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/sentry/projects/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" __NEW_LINE_583db0bbb6875db0__ echo \"=== Alert Rules ===\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get nodes -o wide && echo ''---'' && kubectl top nodes 2>/dev/null || echo ''metrics-server not installed''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide && echo ''---'' && kubectl get pvc -n awoooi-prod 2>/dev/null && echo ''---'' && kubectl get sc 2>/dev/null && echo ''---'' && kubectl get deploy -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get ns && echo ''---'' && kubectl get svc -A | grep -E ''prometheus|grafana|metrics|signoz|longhorn|argocd'' || echo ''No monitoring/gitops services found''\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || echo ''--- K3s default config \\(no custom config.yaml\\) ---'' && echo ''---'' && sudo k3s check-config 2>/dev/null | head -30 || echo ''check-config not available''\")",
"Bash(ssh wooo@192.168.0.120 \"free -h && echo ''---'' && swapon --show && echo ''---'' && df -h /var/lib/rancher/k3s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n cnpg-system && echo ''---'' && kubectl get svc -n monitoring\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get all -n awoooi-prod -o wide 2>/dev/null && echo ''---QUOTA---'' && kubectl describe quota -n awoooi-prod 2>/dev/null && echo ''---EVENTS---'' && kubectl get events -n awoooi-prod --sort-by=''.lastTimestamp'' 2>/dev/null | tail -20\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get helmcharts -A 2>/dev/null || echo ''No HelmCharts'' && echo ''---'' && kubectl get helmreleases -A 2>/dev/null || echo ''No HelmReleases'' && echo ''---'' && kubectl api-resources | grep -E ''argo|flux|velero|longhorn'' || echo ''No GitOps/Backup CRDs''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get ds -A && echo ''---'' && kubectl get cm -n kube-system | grep -E ''traefik|coredns'' && echo ''---REGISTRIES---'' && sudo cat /etc/rancher/k3s/registries.yaml 2>/dev/null || echo ''No registries.yaml''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get ingress -A 2>/dev/null || echo ''No Ingress'' && echo ''---HPA---'' && kubectl get hpa -A 2>/dev/null || echo ''No HPA'' && echo ''---PDB---'' && kubectl get pdb -A 2>/dev/null || echo ''No PDB'' && echo ''---SYSCTL---'' && cat /proc/sys/net/core/somaxconn && cat /proc/sys/fs/file-max\")",
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s | head -20 && echo ''---K3S-VERSION---'' && k3s --version && echo ''---ETCD-STATUS---'' && sudo k3s etcd-snapshot list 2>/dev/null | head -5 || echo ''No etcd snapshots''\")",
"Bash(ssh wooo@192.168.0.121 \"free -h && swapon --show && echo ''---DISK---'' && df -h /var/lib/rancher/k3s 2>/dev/null\")",
"Bash(ssh wooo@192.168.0.120 \"sudo ls -la /var/lib/rancher/k3s/server/db/ 2>/dev/null && echo ''---TOKEN---'' && sudo cat /var/lib/rancher/k3s/server/token 2>/dev/null | head -1 | cut -c1-20\")",
"Bash(ssh -o ConnectTimeout=10 wooo@192.168.0.120 \"ps aux | grep k3s | grep -v grep | head -3 && echo ''---'' && sudo cat /etc/systemd/system/k3s.service 2>/dev/null | grep -E ''ExecStart|datastore''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S mkdir -p /backup/k3s_etcd 2>/dev/null && echo ''0936223270'' | sudo -S chown ollama:ollama /backup/k3s_etcd 2>/dev/null && echo ''=== 188 備份目錄 ==='' && ls -la /backup/\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"mkdir -p ~/.ssh && chmod 700 ~/.ssh && echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCnTnbjtSPwrI/pN6DByDxsFDOR4+sVnk7hb+eOr+Pb4e7o7QGbyKaJC2eKP7uRBilPqeScuvNKZhwmY8ZOuhjId+ZyLK0jZXHdq3a6tjsQ4MwPGyT2aMaD7x2jKzPbFojR0P5lmQWH2zjxeVuB7UeBIejaYk3gQEMFVES8Xh84yxFvy9jlwKmZFAI0gIhx0nPOTPB7onTyb8L5snUbwQQntoHWYFbb83+wui/kM15aLT5r8uvS2yZdsWWrDvAyuIShde1ceTBevwwqxezH1egXGoGkvZYYF7vHFu3X6jF7Nfp4qVfo0EfFV3omy90HzoFvoEXCC+jIWU0TjUqdEgGIEj2b+YXw3bIs+k+g/0/iJzA5LLUNb2vHVHoUmah4ZNlfiGU7e6hTYXjLjoXJlz9gfv6LYywhgktdThi9sUCn6rzbatlMrY0HNUE6uOwRTugMq1YUEJCvRqeFmtX5yF6xGp+FbOjIr1kMmplbRQRqKIrpQoqEn0+UBXC7OwJNCk8= wooo@mon'' >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && echo ''SSH key 已加入''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCs3nQ11B+V/VEchNR9Uzj57JoKXOJ8S1UVjCTHkUDL8FnrbdPFr0zvpYgX0a/Ipj9wHkqU6z6Ho6MQj3X2+HaK5fC0fZ3aZE1QT2df/x0xXdyka9XSaTFaymKzNTvfmum40koBkNccKyO5SLSjTcoTZCDHP4RqHHu/MYjQMejG7yeyCFmgumrHh5T/0DXPf5zl0Ff1C5U3VCLPxz5vq63JB2dTfrjQLg3sO0ZI3KTZE8aFj3txKz5snDZX3nE1tHZMKLecwwEqi130BtVZcm8zXDqX83gtUDp/WLfPyKCmzZzGf6YgEofIsyrVup8XnD9xNoFmbEeBdFocGWeoIVIn+faOpU22fvQ34L57GHhNQwygZOPKsZa9XNKjayKdKQl3gcAA2wnkZgN0cyIEYvTd3O+Z5Xvff2dat+0sDMK571V+0JEdAMOpQjFO7DkwjKHn/gHLmvRjYLiUOItX9JysFgYuHs8omad2LmeUIkQrBD2I2hyvY49HaJKWctk4Jm0= root@mon'' >> ~/.ssh/authorized_keys && echo ''Root SSH key added''\")",
"Bash(grep -r \"\"\"zod\"\"\" /Users/ogt/awoooi/package.json /Users/ogt/awoooi/apps/*/package.json /Users/ogt/awoooi/packages/*/package.json)",
"Bash(__NEW_LINE_144503b060dfd3dd__ echo:*)",
"Bash(__NEW_LINE_ae2a22b14586d7aa__ echo:*)",
"Bash(__NEW_LINE_e17561a4e55f74d4__ echo:*)",
"Bash(ssh wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | sed ''''s|https://127.0.0.1:6443|https://192.168.0.125:6443|g''''\")",
"Bash(KUBECONFIG=/tmp/kubeconfig-vip.yaml kubectl get nodes)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A --no-headers)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get jobs -A --no-headers)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod --no-headers)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml delete job api-watchdog-29556380 -n wooo-aiops-uat)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -n awoooi-prod)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get svc -A)",
"Bash(PGPASSWORD=changeme psql -h 192.168.0.188 -U awoooi -d awoooi_prod -f /Users/ogt/awoooi/apps/api/scripts/migrate_phase18_audit_logs.sql)",
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list)",
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list --workers=1)",
"Bash(KUBECONFIG=~/.kube/config kubectl get nodes --server=https://192.168.0.125:6443 --insecure-skip-tls-verify)",
"Bash(source .venv/bin/activate)",
"Read(//etc/postgresql/14/main/**)",
"Bash(for port:*)",
"Bash(kubectl top:*)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl top pods -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
"Bash(jq -r '.components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
"Bash(tar -xzf velero-v1.13.0-darwin-arm64.tar.gz)",
"Bash(sudo mv:*)",
"Bash(velero version:*)",
"Bash(mkdir -p ~/bin)",
"Bash(mv velero-v1.13.0-darwin-arm64/velero ~/bin/)",
"Bash(~/bin/velero version:*)",
"Bash(k8s/velero/00-namespace.yaml:*)",
"Bash(k8s/velero/01-credentials.yaml:*)",
"Bash(k8s/velero/02-velero-install.yaml:*)",
"Bash(tar -xzf velero.tar.gz)",
"Bash(/tmp/velero-credentials:*)",
"Bash(__NEW_LINE_e85d95513fc16492__ ~/bin/velero install --provider aws --plugins velero/velero-plugin-for-aws:v1.9.0 --bucket velero-backups --secret-file /tmp/velero-credentials --backup-location-config region=minio,s3ForcePathStyle=true,s3Url=http://192.168.0.188:9000 --use-volume-snapshots=false --dry-run -o yaml)",
"Bash(__NEW_LINE_e85d95513fc16492__ head:*)",
"Bash(k8s/velero/README.md:*)",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl apply -f /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml)",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"whoami && hostname && cat /etc/sudoers.d/* 2>/dev/null | head -5 || echo ''no sudoers.d files''\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get nodes 2>&1 || echo ''kubectl failed, checking k3s kubeconfig...'' && ls -la /etc/rancher/k3s/k3s.yaml 2>&1\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo -l 2>&1 | head -20\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''09362233270'' | sudo -S -l 2>&1\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get nodes 2>&1\")",
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml wooo@192.168.0.120:/tmp/velero-install-full.yaml)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S kubectl apply -f /tmp/velero-install-full.yaml 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backupstoragelocation -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=30 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=10 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get secret cloud-credentials -n velero -o jsonpath=''{.data.cloud}'' 2>&1 | base64 -d\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S curl -s http://192.168.0.188:9000/velero-backups/ 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl rollout restart deployment/velero -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backups -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl describe backup test-backup-20260328-2114 -n velero 2>&1 | tail -30\")",
"Bash(sshpass -p:*)",
"Read(//Users/ogt/awoooi/=== 測試 /approvals/**)",
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get svc -n velero -o wide)",
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get pods -n velero -o wide)",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get svc -n velero)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get pods -A | grep -E \\\\\"\"kube-state|state-metrics\\\\\"\"\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get ns | grep -E \\\\\"\"wooo|aiops|legacy|old\\\\\"\"\"\"')",
"Bash(KUBECONFIG=~/.kube/config kubectl get ns --no-headers)",
"WebFetch(domain:build.nvidia.com)",
"WebFetch(domain:ollama.com)",
"WebFetch(domain:docs.api.nvidia.com)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"curl -s ''http://admin:admin@localhost:3002/api/search?type=dash-db'' | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''Dashboard 數量: {len\\(d\\)}''\\); [print\\(f\\\\\"\" - {i[''title'']}\\\\\"\"\\) for i in d[:10]]\"\"\")",
"Bash(jq '.ai_provider // .data.ai_provider // \"\"\"\"not found\"\"\"\"')",
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod deployment/awoooi-api --tail=50)",
"Bash(export NVIDIA_API_KEY=\"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\")",
"Bash(curl -s -X POST \"https://integrate.api.nvidia.com/v1/chat/completions\" -H \"Content-Type: application/json\" -H \"Authorization: Bearer $NVIDIA_API_KEY\" -d '{:*)",
"Bash(/tmp/fix-network-policy.yaml:*)",
"Bash(__NEW_LINE_acde7a92ceae01f6__ scp:*)",
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/alertmanager -H 'Content-Type: application/json' -d '{:*)",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"health\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/rules\"\" 2>/dev/null | grep -o \"\"\\\\\"\"name\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"job\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c | sort -rn')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/query?query=up\"\" 2>/dev/null | grep -o \"\"\\\\\"\"instance\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
"Bash(for i:*)",
"Bash(do sleep:*)",
"Bash(kubectl patch:*)",
"Bash(ssh wooo@192.168.0.110 \"cat /tmp/runner_clean.log 2>/dev/null; echo ''---''; ps aux | grep ''Runner.Listener'' | grep -v grep | wc -l\")",
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod -l app=awoooi-api --tail=200)",
"Bash(/Users/ogt/awoooi/ops/monitoring/deploy-exporters.sh:*)",
"WebFetch(domain:github.com)",
"WebFetch(domain:docs.ollama.com)",
"Skill(telegram:configure)",
"Skill(telegram:configure:*)",
"Bash(USE_NEW_ENGINE=true pytest tests/test_incident*.py -v --tb=short -x)",
"Bash(USE_NEW_ENGINE=true pytest tests/test_approval_field_alignment.py tests/test_learning_service.py -v --tb=short)",
"Bash(/tmp/debug_approval.py:*)",
"Bash(/tmp/debug_approval2.py:*)",
"Bash(/tmp/bulk_sign.sh:*)",
"Bash(bash /tmp/bulk_sign.sh)",
"Bash(/tmp/check_deploy.py:*)",
"Bash(/tmp/check_buttons.py:*)",
"Bash(ssh ollama@192.168.0.188 \"docker logs openclaw --since=10s 2>&1 | grep -Ev ''\\(GET|POST\\) /health'' | tail -10 && echo ''---'' && docker exec openclaw env | grep OPENAI_API_KEY | cut -c1-30\")",
"Read(//Users/ogt/awoooi/https:/awoooi.wooo.work/_next/static/chunks/app/%5Blocale%5D/**)",
"Bash(find /Users/ogt/awoooi/apps/web -type f \\\\\\(-name *.spec.ts -o -name *.spec.tsx \\\\\\))",
"Bash(kubectl -n awoooi-prod get pods)",
"Bash(kubectl -n production get pods)",
"Bash(ssh -o StrictHostKeyChecking=no wooo@192.168.0.121 \"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl get deployment awoooi-web -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].image}'' && echo '''' && sudo kubectl get pods -n awoooi-prod -l app=awoooi-web --no-headers\")",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get pods -n awoooi-prod)",
"Bash(for run_id in 166 165)",
"mcp__plugin_playwright_playwright__browser_navigate",
"mcp__plugin_playwright_playwright__browser_take_screenshot",
"Bash(open \"http://192.168.0.110:3001/wooo/awoooi/actions\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/166/jobs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=10\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runners\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/admin/runners\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=3\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/169/jobs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/179/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=180 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=2\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=181 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/172/jobs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/182/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/178\" -H \"Authorization: token $TOKEN\")",
"mcp__plugin_playwright_playwright__browser_snapshot",
"mcp__plugin_playwright_playwright__browser_fill_form",
"mcp__plugin_playwright_playwright__browser_click",
"Bash(GITEA_TOKEN=\"e6c9fecb1f0148939493ae0fa30407d28c91279d\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $GITEA_TOKEN\")",
<<<<<<< Updated upstream
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 /tmp/a4_smoke.py)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.aider_event_service import classify_severity, should_create_incident, build_signal_data; print\\('✓ All imports successful'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py::test_build_signal_data_redacts_secrets_in_annotations -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.workers.aider_event_processor import AiderEventProcessor, get_aider_event_processor, run_aider_event_processor_loop; print\\('✓ All imports successful'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.ai_router import AIRouter; from src.db.base import get_session_factory; print\\('✓ Imports successful, no circular imports'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py tests/test_aider_event_service.py -v --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.api.v1 import aider_events; from src.workers.aider_event_processor import run_aider_event_processor_loop; from src.core.config import settings; print\\('AIDER_WEBHOOK_SECRET' in settings.__fields__, 'USE_AIDER_FEEDBACK' in settings.__fields__\\)\")",
"Bash(AIDER_WEBHOOK_SECRET=testsecret /Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.main import app; print\\('app OK; title:', app.title\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -q)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e .[dev] --quiet)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e '.[dev]' --quiet)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from aider_watch_client.aiderw import main as awmain; from aider_watch_client.cli import main as climain; print\\('✓ imports ok'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip show aider-watch-client)",
"Bash(tailscale status *)",
"Bash(kubectl rollout *)",
"Bash(bash /Users/ogt/awoooi/scripts/aider_watch_client/scripts/install.sh)",
"Bash(git rebase *)",
"Bash(/opt/homebrew/bin/aiderw --message \"add docstring to hello function\" --exit)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')",
"Bash(kubectl -n awoooi-prod exec awoooi-api-7b9464c969-8ml88 -- python -c ' *)",
"Bash(kubectl -n awoooi-prod rollout restart deployment/awoooi-api)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api --no-headers)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=120s)",
"Bash(/opt/homebrew/bin/aider-watch flush *)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o wide)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=30s)",
"Bash(kubectl -n awoooi-prod exec awoooi-api-6657fb9cf7-47lcg -- python -c \"import src.services.telegram_gateway as tg; import inspect; lines = inspect.getsource\\(tg\\); idx = lines.find\\('response_body=e.response.text'\\); print\\('FOUND' if idx >= 0 else 'NOT FOUND'\\)\")",
"Read(//opt/gitea/**)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -q)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/unit/test_aider_event_service.py tests/unit/test_aider_model.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_aider_event_service.py tests/test_aider_event_processor.py -v)",
"Bash(kubectl -n awoooi-prod get svc)",
"Bash(kubectl -n openclaw get pod)",
"Bash(kubectl -n awoooi-prod exec awoooi-api-7cd784c875-r4qkz -- python -c ' *)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=10m)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=15m)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=20m)",
"Bash(kubectl -n awoooi-prod get secret awoooi-secrets -o yaml)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=30m)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=2h)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name} {.status.containerStatuses[0].imageID}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get ingress)",
"Bash(kubectl -n awoooi-prod get svc awoooi-api-svc)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=60s --prefix)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=5m --prefix)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=5m)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=10m)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=90s --prefix)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-4x69p --since=5m)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 SCAN 0 MATCH \"playbook:PB-*\" COUNT 500)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 DBSIZE)",
"Bash(wait)",
"Read(//Users/**)",
"Read(//Users/ooo/.claude/**)",
"Bash(mkdir -p /Users/ogt/awoooi/.claude/agents)",
"Bash(cp /Users/ogt/.claude/agents/*.md /Users/ogt/awoooi/.claude/agents/)",
"Bash(kubectl -n awoooi-prod logs --tail=400 -l app=awoooi-api --prefix=true)",
"Bash(kubectl -n awoooi-prod logs --tail=300 awoooi-api-65c69fd649-bxbwp)",
"Bash(kubectl -n awoooi-prod logs --tail=20000 -l app=awoooi-api --prefix=false --since=24h)",
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-bxbwp)",
"Bash(kubectl -n awoooi-prod logs --since=24h -l app=awoooi-api --prefix=false)",
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-fmbxd)",
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-fmbxd)",
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-bxbwp)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=30 --since=30m)",
"Bash(kubectl -n awoooi-prod get pods -o wide)",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{.items[0].metadata.creationTimestamp}')",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=5 --since=5m)",
"Bash(kubectl -n awoooi-prod describe pod -l app=awoooi-api)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=20 --since=10m)",
"Bash(kubectl -n awoooi-prod exec deployment/awoooi-api -- python3 -c ' *)",
"Bash(PGPASSWORD=\"\" psql -h 188.188.188.188 -U aiops -d aiops -c \"\\\\d timeline_events\")",
"Bash(kubectl -n awoooi-prod get deploy awoooi-api -o yaml)",
"Bash(PGPASSWORD=\"\" psql --version)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- env)",
"Bash(kubectl -n awoooi-prod logs --tail=500 deploy/awoooi-api)",
"Bash(kubectl cp *)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=up\" 2>&1 | head -c 400')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"sum\\(rate\\(http_requests_total{status=~\\\\\"5..\\\\\"}[5m]\\)\\) / sum\\(rate\\(http_requests_total[5m]\\)\\)\" \"avg\\(rate\\(container_cpu_usage_seconds_total{namespace=\\\\\"awoooi-prod\\\\\",container=\\\\\"awoooi-api\\\\\"}[5m]\\)\\)\" \"pg_stat_activity_count{datname=\\\\\"awoooi\\\\\"}\" \"increase\\(kube_pod_container_status_restarts_total{namespace=\\\\\"awoooi-prod\\\\\"}[15m]\\)\"; do echo \"---- $q\"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=$q\" 2>&1 | head -c 250; echo; done')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT metric_name, count\\(*\\), max\\(trained_at\\) FROM dynamic_baseline_record GROUP BY metric_name;\" 2>&1 | head -20')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT count\\(*\\) as asset_count FROM asset_inventory; SELECT count\\(*\\) as coverage_count FROM asset_coverage_snapshot; SELECT count\\(*\\) as host_cap_count FROM host_capacity_snapshot; SELECT count\\(*\\) as compl_count FROM asset_compliance_snapshot; SELECT count\\(*\\) as rule_cat FROM alert_rule_catalog; SELECT count\\(*\\) as log_cluster FROM log_cluster_record;\" 2>&1')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'python3 -c \" *)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- python3 -c ' *)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"http_requests_total\" \"container_cpu_usage_seconds_total\" \"container_memory_usage_bytes\" \"kube_pod_container_status_restarts_total\" \"pg_stat_activity_count\" \"node_cpu_seconds_total\" \"node_load1\"; do echo -n \"$q => \"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=count\\($q\\)\" 2>&1 | head -c 180; echo; done')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=container_cpu_usage_seconds_total\" 2>&1 | python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); rs=d[\\\\\"data\\\\\"][\\\\\"result\\\\\"][:3]; [print\\(r[\\\\\"metric\\\\\"]\\) for r in rs]; print\\(\\\\\"total series:\\\\\", len\\(d[\\\\\"data\\\\\"][\\\\\"result\\\\\"]\\)\\)\"')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'which kubectl 2>&1; kubectl version --client 2>&1 | head -3; kubectl -n awoooi-prod get deploy awoooi-api 2>&1 | head -3')",
"Bash(kubectl -n awoooi-prod logs --tail=2000 deploy/awoooi-api)",
"Bash(psql --version)",
"WebFetch(domain:core.telegram.org)",
"mcp__plugin_context7_context7__resolve-library-id",
"mcp__plugin_context7_context7__query-docs",
"WebFetch(domain:docs.claude.com)",
"Bash(git tag *)",
"Read(//usr/**)",
"Bash(psql -h 192.168.0.110 -U awoooi_user -d awoooi -c \"SELECT id, alertname, status, confidence, description, created_at FROM approval_records WHERE status='PENDING' AND DATE\\(created_at AT TIME ZONE 'Asia/Taipei'\\) = CURRENT_DATE AT TIME ZONE 'Asia/Taipei' ORDER BY created_at DESC LIMIT 10;\")",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].imagePullPolicy}{\"\\\\n\"}{.spec.template.metadata.labels}{\"\\\\n\"}')",
"Bash(kubectl kustomize *)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=60s)",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --no-headers)",
"Bash(kubectl -n awoooi-prod patch deployment awoooi-api -p '{\"spec\":{\"template\":{\"spec\":{\"containers\":[{\"name\":\"api\",\"image\":\"192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14\"}]}}}}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}{\"\\\\n\"}')",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get pdb awoooi-api-pdb -o jsonpath='{.spec.minAvailable}')",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o wide)",
"Bash(kubectl -n awoooi-prod describe rs -l app=awoooi-api)",
"Bash(kubectl -n awoooi-prod get events --sort-by='.lastTimestamp')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.replicas}{\"\\\\n\"}{.status.replicas}{\"\\\\n\"}{.status.readyReplicas}{\"\\\\n\"}{.status.updatedReplicas}{\"\\\\n\"}')",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.metadata.creationTimestamp}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.conditions[*]}')",
"Bash(kubectl -n awoooi-prod describe deployment awoooi-api)",
"Bash(kubectl -n awoooi-prod get rs -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.spec.template.spec.containers[0].image}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o yaml)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=180s)",
"Bash(kubectl -n awoooi-prod set image deployment/awoooi-api api=192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14 --record=false)",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\t\"}{.status.phase}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.replicas}{\"\\\\t\"}{.status.readyReplicas}{\"\\\\t\"}{.status.updatedReplicas}')",
"Bash(bash /tmp/diagnostic.sh)",
"WebFetch(domain:docs.github.com)",
"WebFetch(domain:docs.sonarsource.com)",
"WebFetch(domain:gitea.com)",
"WebFetch(domain:docs.gitea.com)",
"WebFetch(domain:www.sonarsource.com)",
"WebFetch(domain:golangci-lint.run)",
"WebFetch(domain:www.uber.com)",
"Bash(bash scripts/ops/deploy-alerts.sh --dry-run)",
"Bash(bash scripts/ops/deploy-alerts.sh)",
"Bash(promtool check *)",
"WebFetch(domain:openrouter.ai)",
"WebFetch(domain:qwenlm.github.io)",
"WebFetch(domain:aclanthology.org)",
"WebFetch(domain:datanorth.ai)",
"WebFetch(domain:www.infoq.com)",
"WebFetch(domain:aws.amazon.com)",
"WebFetch(domain:artificialanalysis.ai)",
"WebFetch(domain:www.alibabacloud.com)",
"WebFetch(domain:docs.langchain.com)",
"WebFetch(domain:arxiv.org)",
"WebFetch(domain:blog.kilo.ai)",
"WebFetch(domain:www.siliconflow.com)",
"WebFetch(domain:aicompetence.org)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 ping)",
"Bash(redis-cli ping *)"
=======
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest apps/api/tests/test_aider_event_models.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py -v --collect-only)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py --collect-only)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")"
>>>>>>> Stashed changes
],
"deny": [
"Bash(rm -rf *)",
"Bash(git push --force *)",
"Bash(git reset --hard *)",
"Bash(kubectl delete *)",
"Bash(docker rm -f *)"
],
"additionalDirectories": [
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
"/Users/ogt/awoooi/.claude/hooks",
"/Users/ogt/.claude/channels/telegram",
<<<<<<< Updated upstream
"/Users/ogt",
"/Users/ogt/.claude",
"/Users/ogt/awoooi/apps/web/src/app/[locale]/aiops"
]
},
"hooks": {
"PreToolUse": [
{
"matcher": "",
"hooks": [
{
"type": "command",
"command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/awoooi-guard.js 2>/dev/null || true"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/branch-protection.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/commit-quality.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/large-file-warner.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/mcp-health.js"
}
]
}
],
"PostToolUse": [
{
"matcher": "",
"hooks": [
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/audit-log.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/suggest-compact.js"
}
]
}
],
"Stop": [
{
"matcher": "",
"hooks": [
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/cost-tracker.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/session-summary.js"
}
]
}
=======
"/Users/ogt/aider-watch"
>>>>>>> Stashed changes
]
}
}

View File

@@ -1,827 +0,0 @@
{
"permissions": {
"allow": [
"Bash(pnpm install:*)",
"Bash(npm --version)",
"Bash(npm install:*)",
"Bash(pnpm --version)",
"Bash(pnpm dev:*)",
"Bash(pnpm add:*)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/next.config.*)",
"Bash(pkill -f \"next dev\")",
"Bash(curl -sL http://localhost:3000/zh-TW)",
"Bash(curl -s http://localhost:3000/zh-TW)",
"Bash(pnpm --filter web build)",
"Bash(curl -s http://localhost:3001/zh-TW)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3000/zh-TW)",
"Bash(kubectl apply:*)",
"Bash(chmod +x /Users/ogt/awoooi/deploy-infra.sh)",
"Bash(./deploy-infra.sh)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"mkdir -p /tmp/awoooi-k8s\")",
"Bash(sshpass -p '0936223270' scp -o StrictHostKeyChecking=no /Users/ogt/awoooi/k8s/awoooi-prod/01-namespace-quota.yaml /Users/ogt/awoooi/k8s/awoooi-prod/02-network-policy.yaml /Users/ogt/awoooi/k8s/awoooi-prod/04-configmap.yaml wooo@192.168.0.120:/tmp/awoooi-k8s/)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/02-network-policy.yaml 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/04-configmap.yaml 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get ns awoooi-prod -o wide 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get networkpolicy -n awoooi-prod 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get resourcequota,limitrange,configmap -n awoooi-prod 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"rm -rf /tmp/awoooi-k8s\")",
"Bash(PYTHONPATH=. python -c \"from src.main import app; print\\(''Import OK''\\)\")",
"Bash(curl -s http://localhost:8000/api/v1/health/ready)",
"Bash(curl -s http://localhost:8000/api/v1/health/live)",
"Bash(curl -s http://localhost:8000/)",
"Bash(pkill -f \"uvicorn src.main:app\")",
"Bash(pkill -f \"node.*next\")",
"Bash(curl -s http://localhost:8000/api/v1/health)",
"Read(//Users/ogt/awoooi/apps/api/**)",
"Bash(pnpm typecheck:*)",
"Read(//Users/ogt/awoooi/apps/web/**)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/dashboard/demo/spike/clear)",
"Read(//Users/ogt/awoooi/=== 驗證英文頁面 \\(/en/**)",
"Bash(jq \".devDependencies | keys | map\\(select\\(startswith\\(\"\"@playwright\"\"\\) or startswith\\(\"\"playwright\"\"\\)\\)\\)\")",
"Bash(npx playwright:*)",
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
"Bash(__NEW_LINE_ef548029029cdfac__ echo:*)",
"Bash(curl -s http://localhost:8000/api/v1/health -o /dev/null -w \"Backend: HTTP %{http_code}\\\\n\")",
"Bash(echo '=== 已產出的截圖 ===' find /Users/ogt/awoooi/apps/web/test-results -name *.png)",
"Bash(echo '=== Playwright E2E 測試結果 ===' echo echo '📸 截圖證據 \\(test-results/screenshots/\\):' ls -la /Users/ogt/awoooi/apps/web/test-results/screenshots/ __NEW_LINE_db74e5f56e34db17__ echo echo '🎬 錄影證據 \\(.webm\\):' find /Users/ogt/awoooi/apps/web/test-results -name *.webm -exec ls -la {})",
"Bash(__NEW_LINE_db74e5f56e34db17__ echo:*)",
"Bash(source .venv/bin/activate)",
"Bash(python scripts/demo_multisig.py)",
"Bash(python -c \"from src.api.v1.approvals import router; print\\(''✅ Approvals router loaded:'', len\\(router.routes\\), ''routes''\\)\")",
"Bash(npx tsc:*)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
"Bash(python -c \"from src.main import app; print\\(''✅ API loads successfully''\\)\")",
"Bash(jq)",
"Bash(/Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals\" -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://localhost:8000/api/v1/openapi.json)",
"Bash(python -c \":*)",
"Bash(curl -s http://localhost:3000 -o /dev/null -w \"%{http_code}\")",
"Bash(lsof -ti:3000,3001,8000)",
"Bash(curl -s http://localhost:8000/health)",
"Bash(curl -s http://localhost:8000/api/v1/approvals/pending)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3001/zh-TW/demo)",
"Bash(ls -la test-results/*.png)",
"Bash(cp test-results/cpo102-*.png /Users/ogt/awoooi/docs/screenshots/)",
"Bash(ssh ogt@192.168.0.120 'cat /etc/rancher/k3s/k3s.yaml')",
"Bash(python -c \"from src.main import app; print\\(''✅ main.py imports OK''\\)\")",
"Bash(curl -s http://localhost:8000/api/v1/approvals/k8s-test)",
"Bash(sqlite3 awoooi.db \".tables\")",
"Bash(sshpass -p 0936223270 ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo cat /etc/rancher/k3s/k3s.yaml')",
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/apps/api/k3s-prod.yaml get deployments -n awoooi-prod)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -n awoooi-prod 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -A 2>/dev/null\")",
"Bash(curl -s -X POST http://localhost:8000/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(APPROVAL_ID=\"b58a0d86-fa4e-43ca-881c-02e978cd7943\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{:*)",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT operation_type, target_resource, namespace, success, dry_run_passed, dry_run_message, error_message, execution_duration_ms, created_at FROM audit_logs ORDER BY created_at DESC LIMIT 1;\" -header -column)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n monitoring -l app=grafana 2>/dev/null\")",
"Bash(curl -s http://192.168.0.188:11434/api/tags)",
"Bash(python -c \"from src.main import app; print\\(''✅ Compile OK''\\)\")",
"Bash(curl -s http://localhost:8000/api/v1/ai/status)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}')",
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Output only JSON: {\\\\\"\"\"\"action\\\\\"\"\"\":\\\\\"\"\"\"test\\\\\"\"\"\"}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\"}' --max-time 30)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 60)",
"Bash(PROMPT='你是 ClawBot AI。分析以下監控數據輸出純 JSON無其他文字。:*)",
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d \"{\"\"model\"\":\"\"llama3.2:1b\"\",\"\"prompt\"\":\"\"$PROMPT\"\",\"\"stream\"\":false,\"\"format\"\":\"\"json\"\",\"\"options\"\":{\"\"num_predict\"\":256,\"\"temperature\"\":0.1}}\" --max-time 60)",
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Harbor service returning 404. Output JSON: {\\\\\"\"\"\"suggested_action\\\\\"\"\"\":\\\\\"\"\"\"RESTART_DEPLOYMENT\\\\\"\"\"\",\\\\\"\"\"\"target_resource\\\\\"\"\"\":\\\\\"\"\"\"harbor\\\\\"\"\"\",\\\\\"\"\"\"namespace\\\\\"\"\"\":\\\\\"\"\"\"default\\\\\"\"\"\",\\\\\"\"\"\"risk_level\\\\\"\"\"\":\\\\\"\"\"\"medium\\\\\"\"\"\",\\\\\"\"\"\"reasoning\\\\\"\"\"\":\\\\\"\"\"\"Service down\\\\\"\"\"\",\\\\\"\"\"\"confidence\\\\\"\"\"\":0.8,\\\\\"\"\"\"affected_services\\\\\"\"\"\":[]}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\",\"\"\"\"options\"\"\"\":{\"\"\"\"num_predict\"\"\"\":128,\"\"\"\"temperature\"\"\"\":0.1}}' --max-time 30)",
"Bash(curl -v -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Say hello\"\"\"\",\"\"\"\"stream\"\"\"\":false}' --max-time 30)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 120)",
"Bash(curl -s http://localhost:8000/api/v1/ai/analyze-and-propose -X POST -H \"Content-Type: application/json\")",
"Bash(curl -s http://localhost:8000/api/v1/dashboard)",
"Bash(ls -la ~/Downloads/image*.png)",
"Bash(ls -la ~/Desktop/image*.png)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/public/*.png)",
"WebFetch(domain:openclaw.ai)",
"Bash(ls -la /Users/ogt/Downloads/*.png)",
"Bash(ls -la /Users/ogt/.gemini/antigravity/brain/*/image*.png)",
"Bash(ls -lat /Users/ogt/Downloads/*.png)",
"Bash(curl -s http://localhost:8000/api/v1/approvals)",
"Bash(curl -s -X GET http://localhost:8000/api/v1/approvals/)",
"Bash(APPROVAL_ID=\"4989729e-e518-4e7e-8dff-5c3269e0c82b\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"ciso-001\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Demo CISO\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"資安確認,核准執行\"\"\"\"}')",
"Bash(curl -s http://localhost:8000/api/v1/webhooks/health)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://localhost:3000)",
"Bash(ls -la apps/web/test-results/*.png)",
"Bash(curl -s http://localhost:3000/zh-TW/demo)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3333/zh-TW/demo)",
"Bash(curl -s http://localhost:8001/api/v1/approvals/pending)",
"Bash(curl -s -X POST http://localhost:8001/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://localhost:8001/openapi.json)",
"Bash(curl -s http://localhost:8001/docs)",
"Bash(curl -s http://localhost:8001/api/v1/webhooks/grafana -X OPTIONS)",
"Bash(pnpm run:*)",
"Bash(node scripts/screenshot-rbac.mjs)",
"Bash(pnpm exec:*)",
"Bash(curl -s http://localhost:3333 -o /dev/null -w \"%{http_code}\")",
"Bash(curl -s http://localhost:3333/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Count: {d[count]}''''\\); [print\\(f''''- {a[id][:8]}... risk={a[risk_level]}''''\\) for a in d[''''approvals''''][:3]]\")",
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Connected: {d[\"\"success\"\"]}''''\\); print\\(f'''' Namespaces: {d[\"\"namespaces\"\"][:3]}...''''\\)\" __NEW_LINE_57ae1c1c812968e7__ echo \"\" echo \"3. 資料庫持久化:\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as approvals FROM approval_records;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as timeline FROM timeline_events;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as audits FROM audit_logs;\")",
"Bash(head -2 __NEW_LINE_9bf9481fbdf30d4e__ echo \"\" echo \"2. 告警收斂跳過 LLM 日誌 \\(應該有 4 次\\):\" grep -c \"alert_converged_skip_llm\" /tmp/api-server.log)",
"Bash(python -m json.tool)",
"Bash(__NEW_LINE_7463bff94cecc20f__ echo:*)",
"Bash(__NEW_LINE_13846c8488c5fa9a__ echo:*)",
"Bash(__NEW_LINE_13846c8488c5fa9a__ ls:*)",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Status: {d[\"\"status\"\"]}''''\\)\" __NEW_LINE_32366ca1bb050259__ echo \"\" echo \"2. 待簽核記錄 \\(含 hit_count\\):\" curl -s http://localhost:8000/api/v1/approvals/pending)",
"Read(//Users/ogt/awoooi/**)",
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=10)",
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=5)",
"Bash(ls -la /Users/ogt/awoooi/apps/api/*.txt /Users/ogt/awoooi/apps/api/*.toml)",
"Bash(ls -la /Users/ogt/awoooi/docker-compose*.yml)",
"Bash(ls /Users/ogt/awoooi/k8s/awoooi-prod/*rbac* /Users/ogt/awoooi/k8s/awoooi-prod/*service-account*)",
"Bash(kubectl kustomize:*)",
"Bash(docker compose:*)",
"Bash(docker info:*)",
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(''''API Status:'''', d.get\\(''''status'''', ''''unknown''''\\)\\)\")",
"Bash(pkill -9 -f uvicorn)",
"Bash(lsof -ti:8000)",
"Bash(open -a Docker)",
"Bash(docker stop:*)",
"Bash(lsof -ti:3000)",
"Bash(docker start:*)",
"Bash(docker ps:*)",
"Bash(curl -s http://localhost:3000 -o /dev/null -w 'HTTP Status: %{http_code}\\\\n')",
"Bash(curl -I http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:8000/openapi.json)",
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3 -w \"\\\\n--- HTTP Status: %{http_code} ---\\\\n\")",
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3)",
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\")",
"Bash(curl -s -D - http://localhost:8000/api/v1/dashboard/stream --max-time 2)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/deploy-infra.sh)",
"Bash(./scripts/deploy-infra.sh)",
"Bash(pnpm --filter @awoooi/web build)",
"Bash(timeout 10 env MOCK_MODE=true OTEL_ENABLED=false uvicorn src.main:app --host 0.0.0.0 --port 8099)",
"Bash(timeout 8 pnpm --filter @awoooi/web dev)",
"Bash(git diff:*)",
"Bash(curl -s -I http://localhost:8000/api/v1/dashboard/stream)",
"Bash(timeout 3 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
"Bash(grep -n \"NEXT_PUBLIC\\\\|API_URL\\\\|localhost\" /Users/ogt/awoooi/apps/web/.env*)",
"Bash(timeout 2 curl -s -D - -N http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:3000/)",
"Bash(python -m py_compile scripts/fire_test_alert.py)",
"Bash(python -m scripts.fire_test_alert --help)",
"Bash(python -m scripts.fire_test_alert)",
"Bash(python -m scripts.fire_test_alert --type k8s_pod_crash)",
"Bash(timeout 3 curl -s -N -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
"Bash(python -m scripts.fire_test_alert --type disk_full)",
"Bash(docker restart:*)",
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://localhost:3000)",
"Bash(docker exec:*)",
"Bash(docker rmi:*)",
"Bash(timeout 5 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:3000 -w \"\\\\nHTTP: %{http_code}\\\\n\")",
"Bash(timeout 120 docker logs awoooi-api -f --since 1s)",
"Bash(curl -s -I -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s -X OPTIONS -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" http://localhost:8000/api/v1/dashboard/stream -I)",
"Bash(node /Users/ogt/awoooi/scripts/verify-sse.js)",
"Bash(python -m scripts.fire_test_alert --type db_connection_timeout)",
"Bash(npm run:*)",
"Bash(docker-compose down:*)",
"Bash(docker-compose build:*)",
"Bash(docker-compose up:*)",
"Bash(pkill -f 'next dev')",
"Bash(node /Users/ogt/awoooi/scripts/test-approval-flow.js)",
"Bash(python -m scripts.fire_test_alert --type pod_crash)",
"Bash(node /Users/ogt/awoooi/scripts/test-k8s-executor.js)",
"Bash(kubectl cluster-info:*)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl cluster-info)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/src/app/[locale]/)",
"Bash(python -c \"from src.api.v1 import audit_logs; print\\(''API module loads OK''\\)\")",
"Bash(curl -s http://localhost:3000/zh-TW/action-logs)",
"Bash(pnpm build:*)",
"Bash(curl -s http://localhost:8000/api/v1/audit-logs)",
"Bash(xargs -r kill -9 2)",
"Bash(/dev/null source:*)",
"Bash(python -c \"from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor; print\\(''''httpx ok''''\\)\")",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT * FROM audit_logs ORDER BY created_at DESC LIMIT 5;\")",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT name FROM sqlite_master WHERE type=''table'';\")",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT id, event_type, status, title, created_at FROM timeline_events ORDER BY created_at DESC LIMIT 5;\")",
"Bash(curl -s http://localhost:8000/api/v1/audit-logs/stats)",
"Bash(curl -s http://localhost:8000/api/v1/timeline?limit=10)",
"Bash(curl -s \"http://localhost:8000/api/v1/timeline\")",
"Bash(curl -s http://localhost:8000/api/v1/docs)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/setup-guardrails.sh /Users/ogt/awoooi/scripts/ai_code_reviewer.py)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/.eslintrc*)",
"Bash(ls -la scripts/*.py scripts/*.sh .pre-commit-config.yaml .secrets.baseline apps/web/.eslintrc.js)",
"Bash(python -m src.services.test_context_gatherer)",
"Bash(python -m pytest src/services/test_context_gatherer.py -v)",
"Bash(grep -r \"ClawBot\\\\|clawbot\\\\|CLAWBOT\" --include=*.py --include=*.ts --include=*.tsx apps/)",
"Bash(python scripts/e2e_openclaw_test.py)",
"Bash(python -m pytest tests/e2e_network_test.py -v --tb=short)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/apply_prometheus_config.sh /Users/ogt/awoooi/apps/api/scripts/fire_live_alert.py)",
"Bash(./scripts/apply_prometheus_config.sh)",
"Bash(python scripts/fire_live_alert.py oomkilled)",
"Bash(python scripts/fire_live_alert.py oomkilled --api-url http://localhost:8000)",
"Bash(python scripts/fire_live_alert.py highcpu --api-url http://localhost:8000)",
"Bash(python scripts/fire_live_alert.py podcrash --api-url http://localhost:8000)",
"Bash(python -m pytest tests/test_webhook_telegram_integration.py -v)",
"Bash(ls -la /Users/ogt/awoooi/apps/api/.env*)",
"Bash(ls -la /Users/ogt/wooo-aiops/.env*)",
"Bash(ls -la /Users/ogt/AIOps/.env*)",
"Bash(/Users/ogt/awoooi/apps/api/.env:*)",
"Bash(/tmp/deploy-188-home.sh:*)",
"Bash(chmod +x /tmp/deploy-188-home.sh)",
"Bash(scp /tmp/awoooi-api-deploy.tar.gz /tmp/deploy-188-home.sh ollama@192.168.0.188:/tmp/)",
"Bash(ssh ollama@192.168.0.188 \"bash /tmp/deploy-188-home.sh\")",
"Bash(ssh ollama@192.168.0.188 \"curl -s http://localhost:8000/api/v1/webhooks/health\")",
"Bash(ssh ollama@192.168.0.188 \"tail -50 /tmp/openclaw.log\")",
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && source .venv/bin/activate && pip install sqlalchemy aiosqlite -q && pip install httpx python-dotenv pydantic-settings -q\")",
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''uvicorn src.main:app'' 2>/dev/null; sleep 1; source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > /tmp/openclaw.log 2>&1 & sleep 3 && curl -s http://localhost:8000/api/v1/webhooks/health\")",
"Bash(ssh ollama@192.168.0.188:*)",
"Bash(pkill -f ngrok)",
"Bash(pkill -f \"ssh -fN.*8001\")",
"Bash(ssh -fN -L 8001:localhost:8000 ollama@192.168.0.188)",
"Bash(curl -s http://localhost:8001/api/v1/webhooks/health)",
"Bash(BOT_TOKEN=\"8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk\" curl -s \"https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo\")",
"Bash(curl -s https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo)",
"Bash(curl -s http://localhost:8001/api/v1/webhooks/)",
"Bash(curl -s http://localhost:8001/)",
"Bash(curl -s http://localhost:8001/api/v1/health)",
"Bash(scp /tmp/awoooi-api-v7.tar.gz ollama@192.168.0.188:/tmp/)",
"Bash(tar -czvf /tmp/awoooi-api-v7.1.tar.gz src/ requirements.txt pyproject.toml)",
"Bash(scp /tmp/awoooi-api-v7.1.tar.gz ollama@192.168.0.188:/tmp/)",
"Bash(ssh ollama@192.168.0.188 \"tail -10 /tmp/openclaw.log | grep -E ''''clickhouse|signoz_gold''''\")",
"Bash(ssh ogt@192.168.0.188 \"cd /home/ollama/awoooi-api && tail -50 nohup.out 2>/dev/null || journalctl -u awoooi-api --no-pager -n 50 2>/dev/null || echo ''請手動檢查日誌''\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8123/ -d \"SELECT 1 FORMAT JSONEachRow\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:11434/api/tags)",
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 root@192.168.0.188 \"echo ok\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8001/health)",
"Bash(ssh root@192.168.0.188 \"cat /tmp/openclaw.log 2>/dev/null | tail -100 || echo ''Log file not found''\")",
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/signoz_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/telegram_gateway.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/webhooks.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/models/ai.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/models/)",
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''''uvicorn src.main:app'''' && sleep 2 && nohup .venv/bin/python3 -m uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/health)",
"Bash(curl -s --connect-timeout 10 http://192.168.0.188:8000/health)",
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% test\"\"}')",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% - 統帥全自主驗收 v2\"\"}')",
"Bash(curl -s --connect-timeout 30 --max-time 120 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s --connect-timeout 30 --max-time 180 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"k8s_pod_crash\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"inventory-api\"\",\"\"namespace\"\":\"\"commerce\"\",\"\"message\"\":\"\"Pod crash - 統帥終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo OK && ps aux | grep uvicorn | grep -v grep | head -2\")",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"ssl_expiry\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"nginx-ingress\"\",\"\"namespace\"\":\"\"ingress\"\",\"\"message\"\":\"\"SSL 即將過期 - 終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"db_connection_timeout\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"postgres-primary\"\",\"\"namespace\"\":\"\"database\"\",\"\"message\"\":\"\"DB 連線逾時 - SignOz 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"service_404\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"auth-service\"\",\"\"namespace\"\":\"\"identity\"\",\"\"message\"\":\"\"Service 404 - SignOz + Ollama 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"warning\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"recommendation-engine\"\",\"\"namespace\"\":\"\"ml\"\",\"\"message\"\":\"\"CPU 78% - Ollama 最終測試\"\"}' --connect-timeout 30 --max-time 200)",
"Bash(scp apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/openclaw.py)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/http_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/main.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/config.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/health.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
"Bash(ssh -o ConnectTimeout=5 ollama@192.168.0.188 \"ps aux | grep uvicorn | grep -v grep\")",
"Bash(curl -s -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" -X OPTIONS http://192.168.0.188:8000/api/v1/health -v)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/health)",
"Bash(curl -s -N --max-time 3 http://192.168.0.188:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"%{http_code}\")",
"Bash(open http://localhost:3000/zh-TW)",
"Bash(open http://localhost:3001/zh-TW)",
"Bash(curl -s -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/dashboard/stream --max-time 3)",
"Bash(curl -s -I -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/health)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/pending)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals?status=pending_approval\")",
"Bash(xargs sed:*)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/history?limit=5\")",
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/approved)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline?limit=10\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/action-logs\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=10\")",
"Bash(ssh ogt@192.168.0.188 \"kubectl get nodes\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/k8s-test\")",
"Bash(scp /Users/ogt/awoooi/apps/api/k3s-prod.yaml ogt@192.168.0.188:~/awoooi-api/k3s-prod.yaml)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"cat /etc/rancher/k3s/k3s.yaml\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"echo ''SSH OK'' && pwd\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''SSH OK'' && pwd && ls -la ~/awoooi-api/ 2>/dev/null || echo ''Directory not found''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sshpass -p ''0936223270'' scp -o StrictHostKeyChecking=no wooo@192.168.0.120:/etc/rancher/k3s/k3s.yaml ~/awoooi-api/k3s-prod.yaml && sed -i ''s/127.0.0.1/192.168.0.120/g'' ~/awoooi-api/k3s-prod.yaml && echo ''Kubeconfig deployed!'' && head -10 ~/awoooi-api/k3s-prod.yaml\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn'' 2>/dev/null; sleep 1; nohup .venv/bin/uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload > nohup.out 2>&1 & sleep 3; echo ''=== API Restarted ==='' && tail -20 nohup.out\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn src.main'' || true\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 5)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ollama@192.168.0.188 \"cd ~/awoooi-api && source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
"Bash(sshpass -p:*)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 10)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=8\")",
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -s http://localhost:8000/api/v1/approvals/pending | jq -r \"\".approvals[] | \\\\\"\"ID: \\\\\\(.id\\) | Action: \\\\\\(.action\\)\\\\\"\"\"\"')",
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/health)",
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/approvals/pending)",
"Bash(ssh ollama@192.168.70.188 \"ps aux | grep uvicorn | grep -v grep | head -3\")",
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.70.188 \"echo ''SSH Connected''\")",
"Bash(ping -c 2 -t 5 192.168.70.188)",
"Bash(curl -s --connect-timeout 10 https://awoooi.wooo.tw/api/v1/health)",
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo ''SSH Connected to 188 Base''\")",
"Bash(grep -B 5 -A 30 \"async def add_signature\" /Users/ogt/awoooi/apps/api/src/services/*.py)",
"Bash(ssh ogt@192.168.0.188 \"cd /home/ogt/awoooi && docker compose ps\")",
"Bash(ls -la .env*)",
"Bash(.env:*)",
"Bash(timeout 15 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
"Bash(timeout 20 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
"Bash(timeout 25 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ogt@192.168.0.188 \"cd /home/ogt/wooo-aiops && docker compose ps clawbot 2>/dev/null || docker ps | grep -i claw\")",
"Bash(ls -la ~/.ssh/*.pub)",
"Bash(ssh -i ~/.ssh/id_rsa -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o PasswordAuthentication=no ogt@192.168.0.188 \"echo connected\")",
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/logOut\")",
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/close\")",
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/getUpdates?timeout=3&limit=1\")",
"Bash(ping -c 1 192.168.0.188)",
"Bash(python -m tests.test_redis_multisig)",
"Bash(curl -v -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(python3 -c \":*)",
"Bash(echo ' 無法連線' __NEW_LINE_8fc87454f9798a7d__ echo echo [結論]: echo ' /signals 端點尚未部署到 .188' echo ' 程式碼已完成,需要執行:' echo \" cd apps/api && docker build -t awoooi-api . && docker-compose up -d\")",
"Bash(__NEW_LINE_dc88f37970737861__ cd:*)",
"Bash(__NEW_LINE_dc88f37970737861__ echo:*)",
"Read(//Users/**)",
"Bash(tail -20 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 2] 等待容器啟動 \\(10 秒\\)...\" sleep 10 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 3] 檢查容器狀態...\" docker compose ps)",
"Bash(tail -5 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.2] 重建 API 容器 \\(含 Signal Worker\\)...\" docker compose build api)",
"Bash(1 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.4] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.5] 檢查容器狀態...\" docker compose ps)",
"Bash(__NEW_LINE_f4c8301ec5249760__ echo:*)",
"Bash(__NEW_LINE_21ba3cf3700d942d__ cd:*)",
"Bash(1 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.3] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.4] 檢查容器狀態...\" docker compose ps api)",
"Bash(1 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[2] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[3] 發送測試 Signal...\" curl -s -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(__NEW_LINE_564908ddf866c081__ echo:*)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase63_aggregation.py)",
"Bash(python scripts/test_phase63_aggregation.py)",
"Bash(xargs -r docker exec -i awoooi-redis redis-cli DEL)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_race_condition.py)",
"Bash(python scripts/test_race_condition.py)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase64_proposal.py)",
"Bash(python scripts/test_phase64_proposal.py)",
"Bash(python agent.py --alert FINAL_PHASE_6_TEST)",
"Bash(AWOOOI_REDIS_URL=\"redis://localhost:6379/0\" python agent.py --alert FINAL_PHASE_6_TEST)",
"Bash(curl -s http://localhost:8000/api/v1/incidents)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-06085B/proposal)",
"Bash(grep -r \"mock\\\\|Mock\\\\|MOCK\\\\|fake\\\\|Fake\\\\|dummy\\\\|hardcode\" /Users/ogt/awoooi/apps/web/src --include=*.tsx --include=*.ts -l)",
"Bash(NEXT_PUBLIC_API_URL=http://localhost:8000 pnpm next build --no-lint)",
"Bash(grep -v \"Traceback\\\\|File \"\"/usr\\\\|^\\\\s*$\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Signal Count: {len\\(d[\"\"signals\"\"]\\)}''''\\); [print\\(f'''' - {s[\"\"alert_name\"\"]} \\({s[\"\"signal_id\"\"]}\\)''''\\) for s in d[''''signals'''']]\")",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3003/zh-TW)",
"Bash(curl -s -X GET \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3003\" -H \"Access-Control-Request-Method: GET\" -v)",
"Bash(grep -r TELEGRAM /Users/ogt/awoooi/apps/api/.env*)",
"Bash(grep -r TELEGRAM_BOT_TOKEN /Users/ogt/awoooi --include=*.env* --include=*.yaml --include=*.yml)",
"Bash(curl -s -I -X OPTIONS \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\")",
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
"Bash(python /tmp/e2e_drill.py)",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); i=[x for x in d[''''incidents''''] if x[''''incident_id'''']==''''INC-20260322-06085B''''][0]; print\\(f\"\"Incident: {i[''''incident_id'''']}\"\"\\); print\\(f\"\"Signals: {i[''''signal_count'''']}\"\"\\); print\\(f\"\"Updated: {i[''''updated_at'''']}\"\"\\)\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"15ab6844-ca4e-4a13-aead-dc71cd342445\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"api-gateway\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL - PodCrashLoopBackOff\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"RESTART_DEPLOYMENT\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
"Bash(curl -s -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\" http://localhost:3000/zh-TW)",
"Bash(curl -s -I \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-19DF60/proposal)",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"942e762e-fb97-480f-b21a-d3be67fa70b1\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"core-system\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL TAKE 2 - 二次實彈演習\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"INVESTIGATE_SERVICE\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Accept: application/json\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Incidents: {d[\"\"count\"\"]}''''\\); [print\\(f'''' - {i[\"\"incident_id\"\"]} | {i[\"\"severity\"\"]} | {i[\"\"signal_count\"\"]} signals | {i[\"\"affected_services\"\"]}''''\\) for i in d[''''incidents'''']]\")",
"Bash(curl -s \"http://localhost:8000/api/v1/approvals/pending\" -H \"Origin: http://localhost:3000\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Pending: {d[\"\"count\"\"]} approvals''''\\); [print\\(f'''' - {a[\"\"id\"\"][:8]}... | {a[\"\"risk_level\"\"]} | {a[\"\"action\"\"][:30]}...''''\\) for a in d[''''approvals''''][:3]]\")",
"Bash(mkdir -p /Users/ogt/awoooi/apps/web/public/fonts)",
"Bash(curl -sL -o DSEG7Classic-Bold.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff2\")",
"Bash(curl -sL -o DSEG7Classic-Bold.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff\")",
"Bash(curl -sL -o DSEG7Classic-Regular.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff2\")",
"Bash(curl -sL -o DSEG7Classic-Regular.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff\")",
"Bash(pnpm next:*)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/bootstrap_prod.sh)",
"Bash(/Users/ogt/awoooi/.env:*)",
"Bash(grep -E \"^\\\\.env$|03-secrets\\\\.yaml\" .gitignore)",
"Bash(echo 'Adding to .gitignore...' if ! grep -q ^.env$ .gitignore)",
"Bash(then echo:*)",
"Bash(git add:*)",
"Bash(git commit:*)",
"Bash(git push:*)",
"Bash(git remote:*)",
"Bash(gh repo:*)",
"Bash(gh api:*)",
"Bash(gh run:*)",
"Bash(ls -la pnpm-*.yaml package.json turbo.json)",
"Bash(git status:*)",
"Bash(gh workflow:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod 2>&1 | grep -i ''cors'' -A 5 -B 5\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-79948cbbbf-b8cgj -n awoooi-prod --tail=100\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o name | tail -1 | xargs kubectl logs -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.OPENCLAW_TG_USER_WHITELIST}'' | base64 -d\")",
"Bash(ssh wooo@192.168.0.120 'kubectl patch secret awoooi-secrets -n awoooi-prod --type='\"''\"'json'\"''\"' -p='\"''\"'[:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api -n awoooi-prod && kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod && kubectl rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-747967b787-fcx2r -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.110 \"ps aux | grep -E ''actions-runner|Runner'' | grep -v grep\")",
"Bash(curl -sf http://192.168.0.120:32334/api/v1/health)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fd795cd87-rdpgn -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health | jq .status\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://localhost:32334/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.125.201:8000/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.105.105:3000/ -o /dev/null && echo ''Web OK''\")",
"Bash(ssh ogt@192.168.0.188 \"ls -la /etc/nginx/sites-available/\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-795c95ff76-wch2p -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && ss -tlnp | grep 32334\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://127.0.0.1:32334/api/v1/health | head -c 200\")",
"Bash(ssh wooo@192.168.0.120 \"sudo ufw status 2>/dev/null || sudo iptables -L INPUT -n | head -20\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health | head -c 100\")",
"Bash(ssh wooo@192.168.0.110 \"curl -v --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 | head -30\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service 2>/dev/null | grep -i exec || ps aux | grep k3s | head -3\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service\")",
"Bash(ssh wooo@192.168.0.120 \"netstat -tlnp 2>/dev/null | grep 32334 || ss -tlnp | grep 32334\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:31234/health 2>&1 | head -c 100\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-nginx-ingress -n awoooi-prod -o yaml\")",
"Bash(curl -sk https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -sk -I -X OPTIONS https://awoooi.wooo.work/api/v1/health -H \"Origin: https://awoooi.wooo.work\" -H \"Access-Control-Request-Method: GET\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.120 \"sudo iptables -t nat -L KUBE-NODEPORTS -n 2>/dev/null | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"sudo netstat -tlnp | grep -E ''32334|32335''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tlnp 2>/dev/null | grep -E ''32334|32335'' || netstat -tln | grep -E ''32334|32335''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tln\")",
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120; /home/wooo/bin/kubectl get svc -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"which kubectl || find /usr -name kubectl 2>/dev/null | head -1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod && kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80\")",
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"ls -la /home/wooo/.kube/ && cat /home/wooo/.kube/config-120 2>/dev/null | head -20 || cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"sudo cat /etc/rancher/k3s/k3s.yaml | head -20\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"which kubectl 2>/dev/null || find /home/wooo -name kubectl 2>/dev/null | head -1 || ls -la /home/wooo/bin/\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl describe pod awoooi-api-546b88465d-lb8zm -n awoooi-prod | tail -40\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- wget -qO- http://localhost:8000/api/v1/health 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 20 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 120:32334''\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 121:32334''\")",
"Bash(ssh wooo@192.168.0.110 \"ssh wooo@192.168.0.120 ''cat /etc/rancher/k3s/k3s.yaml 2>/dev/null || echo No k3s.yaml''\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide | grep Running\")",
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.121 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
"Bash(pip3 show:*)",
"Bash(docker build:*)",
"Bash(docker version:*)",
"Bash(docker run:*)",
"Bash(curl -vI -H \"Origin: https://awoooi.wooo.work\" http://localhost:8889/api/v1/health)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get endpoints awoooi-api-svc -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"sudo -n ufw status 2>/dev/null || sudo -n iptables -L INPUT -n 2>/dev/null | head -20 || echo ''Need sudo for firewall check''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
"Bash(ssh wooo@192.168.0.121 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
"Bash(ssh wooo@192.168.0.120 \"ps aux | grep -E ''kube-proxy|k3s'' | grep -v grep | head -5\")",
"Bash(ssh wooo@192.168.0.120 \"cat /proc/sys/net/ipv4/ip_forward\")",
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s 2>/dev/null | head -15 || ps aux | grep ''k3s server'' | grep -v grep\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://127.0.0.1:32334/api/v1/health 2>&1 || echo ''LOCALHOST NodePort FAILED''\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''EXTERNAL IP NodePort FAILED''\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/iptables/rules.v4 2>/dev/null || iptables-save 2>/dev/null | grep -E ''DROP|REJECT|32334|32335'' | head -10 || echo ''Cannot read iptables without sudo''\")",
"Bash(ssh wooo@192.168.0.121 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''Worker->Master NodePort FAILED''\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || ls -la /etc/rancher/k3s/ 2>/dev/null || echo ''No K3s config found''\")",
"Bash(ssh wooo@192.168.0.120 \"netstat -an 2>/dev/null | grep 32334 || ss -an | grep 32334 || echo ''No socket found for 32334''\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L INPUT -n 2>&1 | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L KUBE-ROUTER-INPUT -n 2>&1 | head -30\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | grep -i awoooi || echo ''NO AWOOOI RULES FOUND''\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc awoooi-api-svc -n awoooi-prod -o yaml | grep -A5 ''spec:''\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get networkpolicy -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl apply -f - 2>&1\")",
"Bash(curl -sf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -skf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -sI https://awoooi.wooo.work/)",
"Bash(curl -skI https://awoooi.wooo.work/)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl rollout restart deployment/awoooi-api -n awoooi-prod && /home/wooo/kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
"Bash(curl -sf https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -skf https://awoooi.wooo.work/api/v1/health)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 40 2>&1\")",
"Bash(for i:*)",
"Bash(do curl:*)",
"Bash(echo \"Request $i sent\")",
"Bash(done)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep OTEL\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- env | grep OTEL\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c \"\"import socket; s=socket.socket\\(\\); s.settimeout\\(5\\); s.connect\\(\\(''192.168.0.188'', 24317\\)\\); print\\(''✅ Connection to 24317 OK''\\); s.close\\(\\)\"\" 2>&1\")",
"Bash(curl -vI https://awoooi.wooo.work)",
"Bash(curl -vI https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -sf -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{\"\"source\"\": \"\"prometheus\"\", \"\"severity\"\": \"\"P1\"\", \"\"message\"\": \"\"Test alert from CLI\"\"}')",
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data.WEBHOOK_HMAC_SECRET}'''' 2>/dev/null\")",
"Bash(timeout 15 curl -N -s https://awoooi.wooo.work/api/v1/dashboard/stream)",
"Bash(bash:*)",
"Bash(curl -s https://awoooi.wooo.work/api/v1/metrics/gold)",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT metric_name FROM signoz_metrics.distributed_samples_v4 WHERE unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 1800\\) * 1000 LIMIT 20 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as trace_count FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' | python3 -m json.tool 2>/dev/null | head -30\")",
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"which kubectl || ls -la ~/bin/kubectl 2>/dev/null || ls -la /usr/local/bin/kubectl 2>/dev/null || echo ''kubectl not found''\")",
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"ls -la ~/.kube/ 2>/dev/null; cat ~/.kube/config 2>/dev/null | head -20 || echo ''checking k3s default...''; sudo cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -5 || echo ''no k3s config''\")",
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl get configmap awoooi-config -n awoooi-prod -o yaml 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
"Bash(nc -zv 192.168.0.188 24317)",
"Bash(curl -s http://192.168.0.188:24318/v1/traces -X POST -H \"Content-Type: application/json\" -d '{}')",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 24 HOUR GROUP BY serviceName ORDER BY cnt DESC LIMIT 20 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT service_name, count\\(\\) as cnt FROM signoz_logs.distributed_logs WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE GROUP BY service_name ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SHOW TABLES FROM signoz_logs FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as total FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT JSONExtractString\\(resources_string, ''service.name''\\) as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_logs.distributed_logs_v2 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT resources_string[''service.name''] as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 300000000000\\) GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT body, resources_string FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 60000000000\\) LIMIT 1 FORMAT JSONEachRow\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 2 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, timestamp FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE ORDER BY timestamp DESC LIMIT 5 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, formatDateTime\\(timestamp, ''%Y-%m-%d %H:%M:%S''\\) as ts FROM signoz_traces.distributed_signoz_index_v2 ORDER BY timestamp DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_spans FORMAT TabSeparated\")",
"Bash(ssh wooo@192.168.0.188 \"docker ps | grep -E ''otel|signoz''\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT metric_name, sum\\(value\\) as total FROM signoz_metrics.distributed_samples_v4 WHERE metric_name LIKE ''otelcol%span%'' AND unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 300\\) * 1000 GROUP BY metric_name FORMAT TabSeparated\")",
"Bash(for t:*)",
"Bash(do)",
"Bash(echo -n \"$t: \")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.$t FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \":*)",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v3 FORMAT TabSeparated\")",
"Bash(AWOOOI_API_URL=https://awoooi.wooo.work WEBHOOK_HMAC_SECRET=\"CHANGE_ME_TO_RANDOM_64_CHARS\" python scripts/fire_live_alert.py oomkilled)",
"Bash(timeout 10 curl -sN https://awoooi.wooo.work/api/v1/dashboard/stream)",
"Bash(curl -s https://awoooi.wooo.work/api/v1/dashboard)",
"Bash(npm list:*)",
"Bash(node scripts/verify-frontend.js)",
"Bash(node /Users/ogt/awoooi/scripts/verify-frontend.js)",
"Bash(python -c \"from src.services.proposal_service import ProposalService; print\\(''''✅ ProposalService OK''''\\)\")",
"Bash(python -c \"from src.services.openclaw import OpenClawService; print\\(''''✅ OpenClawService OK''''\\)\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents)",
"Bash(jq -r \".incidents[:2] | .[] | \"\"\\\\\\(.incident_id\\) - \\\\\\(.status\\) - \\\\\\(.severity\\)\"\"\")",
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
"Bash(kubectl logs:*)",
"Bash(ssh ogt@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30\")",
"Bash(curl -sv -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/health)",
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152\")",
"Bash(curl -sv \"http://192.168.0.120:32334/api/v1/incidents\")",
"Bash(curl -s --retry 3 --retry-delay 2 \"http://192.168.0.120:32334/api/v1/health\")",
"Bash(curl -s --retry 3 --retry-delay 2 http://192.168.0.120:32334/api/v1/health)",
"Bash(do echo:*)",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/proposal\" -H \"Content-Type: application/json\")",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-D6C6A0/proposal\" -H \"Content-Type: application/json\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/approvals/pending)",
"Bash(kubectl get:*)",
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://192.168.0.120:32334/api/v1/health)",
"Bash(curl -s http://awoooi.wooo.work/api/v1/health)",
"Bash(curl -s http://awoooi.wooo.work/api/v1/approvals/pending)",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/approvals/pending -k)",
"Bash(ssh root@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh root@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=30\")",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/timeline -k)",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/incidents -k)",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals?include_history=true\" -k)",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152\" -k)",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/audit-logs?limit=10\" -k)",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/audit-logs?limit=10 -k)",
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=100\")",
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-web --tail=50\")",
"Bash(ssh ogt@192.168.0.188 \"kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml logs -n awoooi-prod -l app=awoooi-api --tail=100 2>/dev/null || docker logs awoooi-api --tail=100 2>/dev/null\")",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals/pending\" -k -w \"\\\\n\\\\nHTTP: %{http_code}\\\\nTime: %{time_total}s\\\\n\")",
"Bash(curl -sL -X POST https://awoooi.wooo.work/api/v1/approvals/182e07c1-118a-49d7-b71c-7d33c5484d9b/sign -H 'Content-Type: application/json' -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"test-debug\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Debug Test\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"Testing\"\"\"\"}' -k)",
"Bash(curl -s https://wwooo.aiops.tw/api/v1/health)",
"Bash(curl -s https://wwooo.aiops.tw/api/v1/incidents?limit=5)",
"Bash(curl -s https://wwooo.aiops.tw/api/v1/approvals/pending)",
"Bash(curl -v -s \"https://wwooo.aiops.tw/api/v1/health\")",
"Bash(curl -s \"https://wwooo.aiops.tw/\")",
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/health\")",
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/incidents?limit=5\")",
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-867f67f55d-kvdl2 -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod --show-labels | grep worker\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector/matchLabels\"\", \"\"value\"\": {\"\"system\"\": \"\"awoooi\"\"}}]''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=15\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=40\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -E ''signal_worker|redis_pool|INFO'' | tail -10\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 'curl -s -X POST \"\"http://localhost:32334/api/v1/webhooks/signals\"\" -H \"\"Content-Type: application/json\"\" -d \"\"{:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker|api''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && echo ''==='' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/incidents?limit=5\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/approvals/pending\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | head -50\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''.components''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret -n awoooi-prod -o name\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.WEBHOOK_HMAC_SECRET}'' | base64 -d\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=20 2>&1 | grep -E ''signal|incident|telegram|INFO''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=5''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''telegram|notification|send'' | tail -10\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=2'' && echo ''---'' && curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker && echo ''---'' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod --tail=40\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o jsonpath=''{.spec.podSelector}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector\"\", \"\"value\"\": {\"\"matchLabels\"\": {\"\"system\"\": \"\"awoooi\"\"}}}]''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=20\")",
"Bash(ls -la /Users/ogt/awoooi/apps/api/scripts/fire*.py)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=3''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''proposal|approval|llm|ai|ollama|generate'' | tail -20\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''''{.data}''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data}'' | tr '','' ''\\\\n''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c ''import os; print\\(os.getenv\\(\"\"DATABASE_URL\"\", \"\"NOT SET\"\"\\)[:50]\\)''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep DATABASE\")",
"Bash(ssh wooo@192.168.0.120 \"PGPASSWORD=''CHANGE_ME'' psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c ''SELECT 1'' 2>&1 || echo ''Connection failed''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
"Bash(curl -sv http://192.168.0.120:32334/api/v1/health)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=20 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-7fb7d5b55f-n48gk -n awoooi-prod --tail=20 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-api-75ffbfb88b -n awoooi-prod --replicas=0\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-worker-7fb7d5b55f -n awoooi-prod --replicas=0\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=10\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-api -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=5s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout history deployment/awoooi-api -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-worker -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=30s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs awoooi-api-6687db5564 -n awoooi-prod -o jsonpath=''{.metadata.annotations.deployment\\\\.kubernetes\\\\.io/revision}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-api-7f487f7cbb-5f88g -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod --to-revision=46\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=15\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents?limit=3)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=2m\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --since=2m | grep -i webhook\")",
"Bash(curl -sv -X POST http://192.168.0.120:32334/api/v1/webhooks/alertmanager -H \"Content-Type: application/json\" -d '{:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl get endpoints -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''{status}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=30s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fc4744758-7wfv5 -n awoooi-prod --tail=30 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6fc548887b-b9mtf -n awoooi-prod --tail=30 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o yaml\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data}''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pod awoooi-worker-6fc548887b-b9mtf -n awoooi-prod -o jsonpath=''{.metadata.labels}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o yaml\")",
"Bash(ssh wooo@192.168.0.120 'kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=json -p=\"\"[{\\\\\"\"op\\\\\"\": \\\\\"\"replace\\\\\"\", \\\\\"\"path\\\\\"\": \\\\\"\"/spec/podSelector/matchLabels\\\\\"\", \\\\\"\"value\\\\\"\": {\\\\\"\"system\\\\\"\": \\\\\"\"awoooi\\\\\"\"}}]\"\"')",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-6c69b77894-d6jqq -n awoooi-prod --tail=20\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl run nc-test --rm -it --restart=Never --image=busybox -- nc -zv 192.168.0.188 5432\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o=custom-columns=''NAME:.metadata.name,IMAGE:.spec.containers[0].image''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- ls -la *.db 2>/dev/null || echo ''No SQLite files''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep -E ''MOCK|DATABASE|SQLITE''\")",
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/approvals\")",
"Bash(python -m py_compile src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/skills/loader.py)",
"Bash(python packages/lewooogo-brain/tests/test_skill_loader.py)",
"Bash(python packages/lewooogo-brain/tests/test_incident_engine.py)",
"Bash(python packages/lewooogo-brain/tests/test_guardrails.py)",
"Bash(python -m py_compile src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/skills/loader.py)",
"Bash(PYTHONPATH=/Users/ogt/awoooi/packages/lewooogo-brain/src python -c \":*)",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/health)",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals?status=pending\")",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/incidents\")",
"Bash(uv sync:*)",
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router 語法驗證通過''\\)\")",
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/incidents\" --connect-timeout 10)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" \"https://awoooi.wooo.work\" --connect-timeout 10)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work\" --connect-timeout 10)",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/test-123/propose\" -H \"Content-Type: application/json\" -d '{\"\"require_dry_run\"\": true}' --connect-timeout 10)",
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ollama@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs awoooi-api-64c8659cff-grslz -n awoooi-prod --tail=50)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.DATABASE_URL}')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-api)",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work/zh-TW\" --connect-timeout 10)",
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router import successful''\\)\")",
"Bash(PGPASSWORD=postgres psql -h 192.168.0.188 -U awoooi -d awoooi_dev -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
"Bash(PGPASSWORD=AwoooiProd2026 psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
"Bash(curl -sf http://192.168.0.120:32334/api/v1/incidents)",
"Bash(curl -v \"http://192.168.0.120:32334/api/v1/incidents\")",
"Bash(export KUBECONFIG=/Users/ogt/.kube/config-120)",
"Bash(curl -sI \"http://awoooi.wooo.work/\")",
"Bash(openssl s_client -servername awoooi.wooo.work -connect awoooi.wooo.work:443)",
"Bash(openssl x509:*)",
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260323-7DE10B/propose\" -H \"Content-Type: application/json\" -d '{\"\"\"\"require_dry_run\"\"\"\": true}')",
"Bash(python -c \"from src.services.executor import execute_approved_proposal, get_executor, ActionExecutor; print\\(''✅ Import successful''\\)\")",
"Bash(curl -s https://awoooi.woooo.cc/api/v1/incidents)",
"Bash(curl -s https://awoooi.woooo.cc/api/v1/health)",
"Bash(curl -s --connect-timeout 10 https://awoooi.woooo.cc/api/v1/health)",
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi 2>/dev/null\")",
"Bash(curl -s --connect-timeout 5 http://192.168.70.200:8000/api/v1/health)",
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
"Bash(ssh -o StrictHostKeyChecking=no ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -A)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-7479556d76-jbbps --tail 30)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-api --tail 20)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- curl -s http://localhost:8000/api/v1/incidents)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import httpx; r = httpx.get\\(''http://localhost:8000/api/v1/incidents''\\); print\\(r.text\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get ingress -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].env}')",
"Bash(curl -s --connect-timeout 5 http://192.168.70.202:32334/api/v1/health)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-worker -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-api -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secrets -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.REDIS_URL}')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-worker -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-worker)",
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -s https://awoooi.wooo.work/api/v1/incidents)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 10)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n wooo-aiops-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -A)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-76bdf9786d-rvtmz --tail 15)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import os; print\\(os.getenv\\(''REDIS_URL'', ''NOT_SET''\\)\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-865cdc97db-6mpzz --tail 20)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod -l app=redis)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n wooo-aiops-prod redis-6c6fcd64b8-8wznx -- redis-cli ping)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import redis; r=redis.Redis\\(host=''10.43.239.47'', port=6379, db=10\\); print\\(r.ping\\(\\)\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -A)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type='json' -p='[{\"\"op\"\": \"\"add\"\", \"\"path\"\": \"\"/spec/egress/0/ports/-\"\", \"\"value\"\": {\"\"port\"\": 6379, \"\"protocol\"\": \"\"TCP\"\"}}]')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-5fcc484b85-qpwt6 --tail 15)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import os; print\\(''REDIS_URL:'', os.getenv\\(''REDIS_URL''\\)\\); import redis; r=redis.Redis.from_url\\(os.getenv\\(''REDIS_URL''\\)\\); print\\(''PING:'', r.ping\\(\\)\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-59d7588d75-p5tht --tail 20)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 30)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl apply -f -)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-6cd7dcbc9-5mtfq --tail 15)",
"Bash(jq .incidents[0])",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath='{.data.OPENCLAW_URL}')",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/health)",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/)",
"Bash(nc -zv 192.168.0.188 8088 -w 5)",
"Bash(ping -c 2 192.168.0.188)",
"Bash(ping -c 2 192.168.70.202)",
"Bash(grep -n \"mapToDualState\" /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx -A 30)",
"Bash(head -40 /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx)",
"Bash(ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw; docker start openclaw 2>/dev/null || docker start clawbot 2>/dev/null || echo ''Container not found, listing all:'' && docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -10\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8089/health)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=60s)",
"Bash(grep -rn \"clawbot\\\\|ClawBot\" /Users/ogt/awoooi/ --include=*.yaml --include=*.yml --include=*.json)",
"Bash(grep -rn \"ClawBot\\\\|clawbot\" /Users/ogt/awoooi/apps/ --include=*.py --include=*.ts --include=*.tsx)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=100)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200)",
"Bash(export KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml)",
"Bash(ssh root@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|exception|execute|background|parse'' | tail -40\")",
"Bash(curl -s https://awoooi.wooo.work/api/v1/approvals)",
"Bash(ssh k3s@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
"Bash(ssh ubuntu@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse|skip'' | tail -50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=500 2>&1 | grep -iE ''background_execution|approve_action|reject|k8s_executor'' | tail -30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy,sts -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>&1 | grep -iE ''background_execution|k8s_executor|parse'' | tail -10\")"
],
"additionalDirectories": [
"/Users/ogt/awoooi/docs",
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
"/Users/ogt/awoooi/apps/web/src/app",
"/Users/ogt/awoooi/apps/api",
"/Users/ogt/awoooi/apps/api/http:/localhost:8000/api/v1",
"/Users/ogt/awoooi/apps/web/public",
"/Users/ogt/Downloads",
"/Users/ogt/awoooi/apps/web/test-results",
"/Users/ogt/awoooi",
"/Users/ogt/awoooi/apps/web/src/app/[locale]",
"/tmp"
]
}
}

View File

@@ -19,10 +19,14 @@
# 文件與腳本(不需要進 image
# 注意: docs/runbooks/, docs/adr/, .agents/skills/ 供 RAG 索引 (ADR-067 Phase 33)
# scripts/ 大部分不需要進 image但 CronJob 腳本需要
# scripts/ 大部分不需要進 image僅白名單 production runtime/ops 種子腳本
# 2026-04-12 ogt (ADR-073 P2-1): 白名單允許 cron_km_vectorize.py
scripts
# 2026-05-13 codex: 白名單 T16 auto-repair canary PlayBook seed script
scripts/**
!scripts/
!scripts/cron_km_vectorize.py
!scripts/ops/
!scripts/ops/awooop-seed-auto-repair-canary-playbook.py
# Node 快取monorepo 根目錄)
node_modules

View File

@@ -10,7 +10,7 @@ on:
jobs:
lint:
runs-on: self-hosted
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

View File

@@ -43,10 +43,19 @@ jobs:
├ 📝 ${{ steps.commit.outputs.message }}
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
└ 🌿 dev branch"
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
if AWOOI_CICD_STATUS=running \
AWOOI_CICD_STAGE=dev-deploy \
AWOOI_CICD_JOB_NAME="[DEV] 部署開始" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Dev deploy start notification mirrored through AWOOI API"
else
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
fi
# API 測試 (同 prod CI確保 dev 也通過)
- name: Run API Tests
@@ -78,11 +87,18 @@ jobs:
echo "✅ API 測試通過"
- name: Login to Harbor
uses: docker/login-action@v3
with:
registry: ${{ env.HARBOR }}
username: ${{ secrets.HARBOR_USERNAME }}
password: ${{ secrets.HARBOR_PASSWORD }}
run: |
HARBOR_USERNAME="$(cat <<'AWOOOI_SECRET_HARBOR_USERNAME'
${{ secrets.HARBOR_USERNAME }}
AWOOOI_SECRET_HARBOR_USERNAME
)"
HARBOR_PASSWORD="$(cat <<'AWOOOI_SECRET_HARBOR_PASSWORD'
${{ secrets.HARBOR_PASSWORD }}
AWOOOI_SECRET_HARBOR_PASSWORD
)"
printf '%s' "$HARBOR_PASSWORD" | docker login "${{ env.HARBOR }}" \
-u "$HARBOR_USERNAME" \
--password-stdin
# Dev API 鏡像:強制重建,不用 cache確保 models.json 等配置文件更新)
- name: Build and Push API (Dev)
@@ -98,34 +114,57 @@ jobs:
# 注入 Dev K8s Secrets
- name: Inject Dev K8s Secrets
env:
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
run: |
secret_b64() {
python3 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
}
write_deploy_key() {
mkdir -p ~/.ssh
umask 077
cat > ~/.ssh/deploy_key <<'AWOOOI_DEPLOY_KEY'
${{ secrets.DEPLOY_SSH_KEY }}
AWOOOI_DEPLOY_KEY
chmod 600 ~/.ssh/deploy_key
}
TG_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_BOT_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_BOT_TOKEN
)"
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_CHAT_ID'
${{ secrets.TELEGRAM_CHAT_ID }}
AWOOOI_SECRET_TG_CHAT_ID
)"
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
${{ secrets.NVIDIA_API_KEY }}
AWOOOI_SECRET_NVIDIA_API_KEY
)"
GEMINI_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_GEMINI_API_KEY'
${{ secrets.GEMINI_API_KEY }}
AWOOOI_SECRET_GEMINI_API_KEY
)"
mkdir -p ~/.ssh
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
chmod 600 ~/.ssh/deploy_key
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 << SECRETS
write_deploy_key
# 2026-05-05 Codex: kubectl runs on 120 control-plane. 121 is a
# worker and its local kubeconfig points at 127.0.0.1:6443.
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << SECRETS
set -e
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"'"$(echo -n "${TG_BOT_TOKEN}" | base64 -w 0)"'"},
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"'"$(echo -n "${TG_CHAT_ID}" | base64 -w 0)"'"}
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
]' || echo "⚠️ Telegram Secrets patch 跳過"
if [ -n "${NVIDIA_API_KEY}" ]; then
if [ -n "${NVIDIA_API_KEY_B64}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"'"$(echo -n "${NVIDIA_API_KEY}" | base64 -w 0)"'"}
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"${NVIDIA_API_KEY_B64}"}
]' && echo "✅ NVIDIA_API_KEY 已注入 dev"
fi
if [ -n "${GEMINI_API_KEY}" ]; then
if [ -n "${GEMINI_API_KEY_B64}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"'"$(echo -n "${GEMINI_API_KEY}" | base64 -w 0)"'"}
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"${GEMINI_API_KEY_B64}"}
]' && echo "✅ GEMINI_API_KEY 已注入 dev"
fi
@@ -134,14 +173,12 @@ jobs:
# 部署到 awoooi-dev
- name: Deploy to Dev K8s
env:
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
run: |
cat k8s/awoooi-dev/02-configmap.yaml | \
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 \
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 \
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 << 'DEPLOY'
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << 'DEPLOY'
set -e
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
@@ -182,10 +219,20 @@ jobs:
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
├ ⏱️ 耗時: ${MINUTES}m ${SECONDS}s
└ 🩺 http://192.168.0.125:32344/api/v1/health"
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
if AWOOI_CICD_STATUS=success \
AWOOI_CICD_STAGE=dev-deploy \
AWOOI_CICD_JOB_NAME="[DEV] 部署完成" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Dev deploy success notification mirrored through AWOOI API"
else
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
fi
- name: Notify Dev Deploy Failure
if: failure()
@@ -194,7 +241,16 @@ jobs:
├ 📝 ${{ steps.commit.outputs.message }}
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
└ 🔗 <a href=\"http://192.168.0.110:3001/wooo/awoooi/actions\">查看日誌</a>"
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
if AWOOI_CICD_STATUS=failed \
AWOOI_CICD_STAGE=dev-deploy \
AWOOI_CICD_JOB_NAME="[DEV] 部署失敗" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Dev deploy failure notification mirrored through AWOOI API"
else
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
fi

View File

@@ -17,6 +17,9 @@ on:
- 'apps/**'
- 'k8s/**'
- '.dockerignore'
# Dockerfile COPY scripts/ into the API image; keep production ops
# seed scripts deploy-coupled instead of repo-only.
- 'scripts/ops/awooop-seed-auto-repair-canary-playbook.py'
# Workflow-only changes do not rebuild runtime images. Use workflow_dispatch
# when an operator explicitly wants to test the CD pipeline itself.
# docs/、memory/、ADR 等不觸發
@@ -42,6 +45,15 @@ env:
OTEL_SERVICE_NAME: awoooi-cd
OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=production
CI_IMAGE: 192.168.0.110:5000/awoooi/ci-runner:act-22.04
# 2026-05-06 Codex: deploy through the 120 control-plane node. After dirty
# reboots, 121 host-key prompts can block the non-interactive host runner.
# Both nodes support the sudo kubectl path, but 120 removes the extra hop.
K8S_SSH_HOST: 192.168.0.120
K8S_API_SERVER: https://192.168.0.120:6443
# 2026-05-05 Codex: health/smoke probes use the keepalived VIP instead of a
# fixed node. Kubectl still tunnels through K8S_SSH_HOST with --server=120.
API_HEALTH_URL: http://192.168.0.125:32334/api/v1/health
ALERT_CHAIN_API_URL: http://192.168.0.125:32334
jobs:
tests:
@@ -53,8 +65,20 @@ jobs:
# 2026-04-10 ogt: B5 改用 docker run 本地啟動,移除 services: 宣告
# Gitea act runner 的 services: container name 為空,導致 CI 失敗
steps:
- name: Bootstrap Host Runner Tools
# 2026-05-05 Codex: awoooi-host maps to the long-lived act-runner
# container. After dirty reboots it may not contain node/curl/git, and
# actions/checkout@v4 fails before tests can start.
run: |
if command -v apk >/dev/null 2>&1; then
apk add --no-cache nodejs npm git curl bash openssh-client docker-cli docker-cli-buildx
fi
- uses: actions/checkout@v4
- name: Guard Workflow Secret Surfaces
run: node scripts/ci/check-gitea-step-env-secrets.js
# 2026-03-31 ogt: 優化告警格式 - 提高可讀性
- name: Get Commit Info
id: commit
@@ -74,10 +98,20 @@ jobs:
MSG=$(printf '🚀 <b>AWOOOI 部署開始</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n└ 👤 %s' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
# 2026-05-02 Claude Opus 4.7 + 統帥 ogt: notify 失敗不該擋整條 CI鐵證:
# curl 400 從 5/1 起連續炸 14 個 commit 的 build-and-deploy— 對齊 line 922 既有 pattern
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
if AWOOI_CICD_STATUS=running \
AWOOI_CICD_STAGE=tests \
AWOOI_CICD_JOB_NAME="AWOOOI 部署開始" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "✅ CI/CD start notification mirrored through AWOOI API"
else
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
fi
@@ -237,10 +271,20 @@ jobs:
ACTOR="${{ github.actor }}"
COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🧪 Stage: tests\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
if AWOOI_CICD_STATUS=failed \
AWOOI_CICD_STAGE=tests \
AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "✅ CI/CD tests failure notification mirrored through AWOOI API"
else
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
fi
build-and-deploy:
# 2026-04-30 Codex: Docker builds run on the host runner. Long docker build
@@ -249,6 +293,14 @@ jobs:
timeout-minutes: 60
runs-on: awoooi-host
steps:
- name: Bootstrap Host Runner Tools
# 2026-05-05 Codex: keep the host-mode runner self-healing before
# actions/checkout@v4 and Telegram failure notifications run.
run: |
if command -v apk >/dev/null 2>&1; then
apk add --no-cache nodejs npm git curl bash openssh-client docker-cli docker-cli-buildx
fi
- uses: actions/checkout@v4
- name: Get Commit Info
@@ -274,6 +326,7 @@ jobs:
run: |
LOCK_NAME="awoooi-cd-docker-build-lock"
STALE_SECONDS=7200
EMPTY_LOCK_SECONDS=300
WAIT_ATTEMPTS=180
for attempt in $(seq 1 "$WAIT_ATTEMPTS"); do
@@ -297,9 +350,24 @@ jobs:
python3 -c "import sys, datetime, re; ts = re.sub(r'\\.\d+', '', sys.argv[1]); ts = re.sub(r'\\s+[A-Z]{2,4}$', '', ts.strip()); print(int(datetime.datetime.strptime(ts, '%Y-%m-%d %H:%M:%S %z').timestamp()))" \
"$CREATED_AT" 2>/dev/null || echo 0)
NOW_EPOCH=$(date +%s)
LOCK_AGE=$((NOW_EPOCH - CREATED_EPOCH))
# 2026-05-05 Codex: dirty reboot / cancelled Actions can leave
# the Docker-network lock behind with no active build or push.
# Waiting the full 30m CD timeout keeps deploys queued even
# though no job is protected, so clear empty locks after 5m.
# 2026-05-12 Codex: 用 bracket pattern 避免 lock-check shell 自己的
# grep/awk pattern 被誤判成 active docker work導致 empty lock 永不自清。
ACTIVE_DOCKER_WORK=$(ps -eo pid,args | awk '$0 ~ /[d]ocker (build|push)|[b]uildx build/ {print}' || true)
if [ "$CREATED_EPOCH" -gt 0 ] && \
[ $((NOW_EPOCH - CREATED_EPOCH)) -gt "$STALE_SECONDS" ]; then
echo "⚠️ stale Docker build lock detected (age=$((NOW_EPOCH - CREATED_EPOCH))s > ${STALE_SECONDS}s), removing ${LOCK_NAME}"
[ "$LOCK_AGE" -gt "$EMPTY_LOCK_SECONDS" ] && \
[ -z "$ACTIVE_DOCKER_WORK" ]; then
echo "⚠️ empty Docker build lock detected (age=${LOCK_AGE}s > ${EMPTY_LOCK_SECONDS}s, no active docker build/push), removing ${LOCK_NAME}"
docker network rm "$LOCK_NAME" >/dev/null 2>&1 || true
continue
fi
if [ "$CREATED_EPOCH" -gt 0 ] && \
[ "$LOCK_AGE" -gt "$STALE_SECONDS" ]; then
echo "⚠️ stale Docker build lock detected (age=${LOCK_AGE}s > ${STALE_SECONDS}s), removing ${LOCK_NAME}"
docker network rm "$LOCK_NAME" >/dev/null 2>&1 || true
continue
fi
@@ -315,8 +383,8 @@ jobs:
# ── API 鏡像建置(含 Layer Cache 加速)──────────────────────────────
# 2026-04-01 ogt: CACHE_BUST=git_sha 確保 src/ 和 models.json 層每次重建
# deps 層 (pip install) 仍可 cache → 加速;代碼/配置層強制失效
# 首席架構師 Review C1 (2026-04-05 Claude Code): 補 DOCKER_BUILDKIT=1
# BUILDKIT_INLINE_CACHE=1 只有在 BuildKit 啟用時才有效
# 2026-05-05 Codex: host runner bootstrap installs docker-cli-buildx;
# keep BuildKit enabled because the web Dockerfile uses RUN --mount.
- name: Build and Push API
env:
DOCKER_BUILDKIT: "1"
@@ -338,7 +406,7 @@ jobs:
# 2026-04-01 Claude Code: CACHE_BUST=git_sha 取代 --no-cache
# - deps 層 (pnpm install) 仍可 cache → 節省 ~2-3 min
# - COPY . . 以下由 CACHE_BUST 強制失效 → 業務邏輯/CSRF 等變更正確進入 bundle
# 2026-04-12 ogt: 實測 --no-cache=10m50sCACHE_BUST=5m50s恢復此方案
# 2026-05-05 Codex: mirror API build mode; BuildKit required for cache mounts.
- name: Build and Push Web
env:
DOCKER_BUILDKIT: "1"
@@ -369,113 +437,204 @@ jobs:
# 2026-03-31 ogt: P0-1 Secrets 自動注入 (ADR-035 強制)
# 2026-03-31 ogt: 加入 AI API Keys (修復 mock_fallback 問題)
- name: Inject K8s Secrets
env:
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
# 2026-04-01 Claude Code: Langfuse LLMOps keys (Phase 15.1 補齊 CD 注入)
LANGFUSE_PUBLIC_KEY: ${{ secrets.LANGFUSE_PUBLIC_KEY }}
LANGFUSE_SECRET_KEY: ${{ secrets.LANGFUSE_SECRET_KEY }}
# 2026-04-02 Claude Code: Telegram 白名單 (授權簽核用)
TG_USER_WHITELIST: ${{ secrets.OPENCLAW_TG_USER_WHITELIST }}
# Phase O-4.1 2026-04-02: Sentry API Token (Wave A.1 ADR-037)
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
# ADR-059 2026-04-05: Gitea Webhook Secret (GITEA_ 前綴為保留字,改用 AWOOOI_ 前綴)
GITEA_WEBHOOK_SECRET: ${{ secrets.AWOOOI_GITEA_WEBHOOK_SECRET }}
# MCP Phase 3: ArgoCD API Token (2026-04-11 Claude Sonnet 4.6)
ARGOCD_API_TOKEN: ${{ secrets.ARGOCD_API_TOKEN }}
# 2026-04-18 ogt + Claude Opus 4.7: ADR-090-B L3-only 升級 L2永久連線串 + 應用 secret
DATABASE_URL: ${{ secrets.DATABASE_URL }}
MIGRATION_DATABASE_URL: ${{ secrets.MIGRATION_DATABASE_URL }}
REDIS_URL: ${{ secrets.REDIS_URL }}
JWT_SECRET: ${{ secrets.JWT_SECRET }}
JWT_ALGORITHM: ${{ secrets.JWT_ALGORITHM }}
WEBHOOK_HMAC_SECRET: ${{ secrets.WEBHOOK_HMAC_SECRET }}
SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }}
# AWOOOI_ 前綴避開 Gitea 保留字(同 AWOOOI_GITEA_WEBHOOK_SECRET 模式)
GITEA_API_TOKEN: ${{ secrets.AWOOOI_GITEA_API_TOKEN }}
NEMOTRON_BOT_TOKEN: ${{ secrets.NEMOTRON_BOT_TOKEN }}
OPENCLAW_BOT_TOKEN: ${{ secrets.OPENCLAW_BOT_TOKEN }}
SMTP_HOST: ${{ secrets.SMTP_HOST }}
SRE_GROUP_CHAT_ID: ${{ secrets.SRE_GROUP_CHAT_ID }}
run: |
# 2026-05-18 Codex: 不把 secrets 放進 step-level env。
# Gitea/act_runner 的 job log 可能展開 env這裡只在 shell 內短暫轉
# base64並避免輸出原值。
secret_b64() {
if command -v python3.11 >/dev/null 2>&1; then
python3.11 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
elif command -v python3 >/dev/null 2>&1; then
python3 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
else
secret_value="$(cat)"
printf '%s' "${secret_value}" | base64 | tr -d '\n'
fi
}
write_deploy_key() {
mkdir -p "${HOME}/.ssh"
umask 077
cat > "${HOME}/.ssh/deploy_key" <<'AWOOOI_DEPLOY_KEY'
${{ secrets.DEPLOY_SSH_KEY }}
AWOOOI_DEPLOY_KEY
chmod 600 "${HOME}/.ssh/deploy_key"
}
TG_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_BOT_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_BOT_TOKEN
)"
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_CHAT_ID'
${{ secrets.TELEGRAM_CHAT_ID }}
AWOOOI_SECRET_TG_CHAT_ID
)"
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
${{ secrets.NVIDIA_API_KEY }}
AWOOOI_SECRET_NVIDIA_API_KEY
)"
GEMINI_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_GEMINI_API_KEY'
${{ secrets.GEMINI_API_KEY }}
AWOOOI_SECRET_GEMINI_API_KEY
)"
LANGFUSE_PUBLIC_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_LANGFUSE_PUBLIC_KEY'
${{ secrets.LANGFUSE_PUBLIC_KEY }}
AWOOOI_SECRET_LANGFUSE_PUBLIC_KEY
)"
LANGFUSE_SECRET_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_LANGFUSE_SECRET_KEY'
${{ secrets.LANGFUSE_SECRET_KEY }}
AWOOOI_SECRET_LANGFUSE_SECRET_KEY
)"
TG_USER_WHITELIST_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_USER_WHITELIST'
${{ secrets.OPENCLAW_TG_USER_WHITELIST }}
AWOOOI_SECRET_TG_USER_WHITELIST
)"
SENTRY_AUTH_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_SENTRY_AUTH_TOKEN'
${{ secrets.SENTRY_AUTH_TOKEN }}
AWOOOI_SECRET_SENTRY_AUTH_TOKEN
)"
GITEA_WEBHOOK_SECRET_B64="$(secret_b64 <<'AWOOOI_SECRET_GITEA_WEBHOOK_SECRET'
${{ secrets.AWOOOI_GITEA_WEBHOOK_SECRET }}
AWOOOI_SECRET_GITEA_WEBHOOK_SECRET
)"
ARGOCD_API_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_ARGOCD_API_TOKEN'
${{ secrets.ARGOCD_API_TOKEN }}
AWOOOI_SECRET_ARGOCD_API_TOKEN
)"
DATABASE_URL_B64="$(secret_b64 <<'AWOOOI_SECRET_DATABASE_URL'
${{ secrets.DATABASE_URL }}
AWOOOI_SECRET_DATABASE_URL
)"
MIGRATION_DATABASE_URL_B64="$(secret_b64 <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
${{ secrets.MIGRATION_DATABASE_URL }}
AWOOOI_SECRET_MIGRATION_DATABASE_URL
)"
REDIS_URL_B64="$(secret_b64 <<'AWOOOI_SECRET_REDIS_URL'
${{ secrets.REDIS_URL }}
AWOOOI_SECRET_REDIS_URL
)"
JWT_SECRET_B64="$(secret_b64 <<'AWOOOI_SECRET_JWT_SECRET'
${{ secrets.JWT_SECRET }}
AWOOOI_SECRET_JWT_SECRET
)"
JWT_ALGORITHM_B64="$(secret_b64 <<'AWOOOI_SECRET_JWT_ALGORITHM'
${{ secrets.JWT_ALGORITHM }}
AWOOOI_SECRET_JWT_ALGORITHM
)"
WEBHOOK_HMAC_SECRET_B64="$(secret_b64 <<'AWOOOI_SECRET_WEBHOOK_HMAC_SECRET'
${{ secrets.WEBHOOK_HMAC_SECRET }}
AWOOOI_SECRET_WEBHOOK_HMAC_SECRET
)"
AWOOOP_OPERATOR_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY'
${{ secrets.AWOOOP_OPERATOR_API_KEY }}
AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY
)"
SENTRY_DSN_B64="$(secret_b64 <<'AWOOOI_SECRET_SENTRY_DSN'
${{ secrets.SENTRY_DSN }}
AWOOOI_SECRET_SENTRY_DSN
)"
CLAUDE_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_CLAUDE_API_KEY'
${{ secrets.CLAUDE_API_KEY }}
AWOOOI_SECRET_CLAUDE_API_KEY
)"
GITEA_API_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_GITEA_API_TOKEN'
${{ secrets.AWOOOI_GITEA_API_TOKEN }}
AWOOOI_SECRET_GITEA_API_TOKEN
)"
NEMOTRON_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_NEMOTRON_BOT_TOKEN'
${{ secrets.NEMOTRON_BOT_TOKEN }}
AWOOOI_SECRET_NEMOTRON_BOT_TOKEN
)"
OPENCLAW_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_OPENCLAW_BOT_TOKEN'
${{ secrets.OPENCLAW_BOT_TOKEN }}
AWOOOI_SECRET_OPENCLAW_BOT_TOKEN
)"
SMTP_HOST_B64="$(secret_b64 <<'AWOOOI_SECRET_SMTP_HOST'
${{ secrets.SMTP_HOST }}
AWOOOI_SECRET_SMTP_HOST
)"
SRE_GROUP_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_SRE_GROUP_CHAT_ID'
${{ secrets.SRE_GROUP_CHAT_ID }}
AWOOOI_SECRET_SRE_GROUP_CHAT_ID
)"
# S1/S2: 統一命名 deploy_key改用 ssh-keyscan比 StrictHostKeyChecking=no 更安全)
mkdir -p ~/.ssh
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
chmod 600 ~/.ssh/deploy_key
ssh-keyscan 192.168.0.121 >> ~/.ssh/known_hosts 2>/dev/null
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 << SECRETS
write_deploy_key
# 2026-05-13 Codex: keyscan must include ED25519 explicitly. Some
# OpenSSH builds otherwise record only RSA/ECDSA, then strict deploy
# SSH fails with "No ED25519 host key is known" after image push.
ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${HOME}/.ssh/known_hosts" 2>/dev/null
test -s "${HOME}/.ssh/known_hosts" || { echo "❌ K8S host keyscan failed: ${K8S_SSH_HOST}"; exit 1; }
SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${HOME}/.ssh/known_hosts -o ConnectTimeout=10"
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" << SECRETS
set -e
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
K8S_API_SERVER="${{ env.K8S_API_SERVER }}"
KUBECTL="sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=\${K8S_API_SERVER}"
# 注入 Telegram Secrets (ADR-035 鐵律)
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"'$(echo -n "${TG_BOT_TOKEN}" | base64 -w 0)'"},
{"op":"add","path":"/data/OPENCLAW_TG_CHAT_ID","value":"'$(echo -n "${TG_CHAT_ID}" | base64 -w 0)'"}
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
{"op":"add","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
]' || { echo "❌ Telegram Secrets patch 失敗 — ADR-035 鐵律"; exit 1; }
# 2026-03-31 ogt: 注入 AI API Keys (修復 NVIDIA/Gemini mock_fallback)
# 2026-04-01 Claude Code: base64 -w 0 防止長 key 換行破壞 JSON
# NVIDIA NIM (免費 tier)
if [ -n "${NVIDIA_API_KEY}" ] && [ "${NVIDIA_API_KEY}" != "" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/NVIDIA_API_KEY","value":"'$(echo -n "${NVIDIA_API_KEY}" | base64 -w 0)'"}
if [ -n "${NVIDIA_API_KEY_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/NVIDIA_API_KEY","value":"${NVIDIA_API_KEY_B64}"}
]' && echo "✅ NVIDIA_API_KEY 已注入" || echo "⚠️ NVIDIA_API_KEY patch 失敗"
else
echo "⚠️ NVIDIA_API_KEY 未設定,跳過"
fi
# Gemini (備援)
if [ -n "${GEMINI_API_KEY}" ] && [ "${GEMINI_API_KEY}" != "" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/GEMINI_API_KEY","value":"'$(echo -n "${GEMINI_API_KEY}" | base64 -w 0)'"}
if [ -n "${GEMINI_API_KEY_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/GEMINI_API_KEY","value":"${GEMINI_API_KEY_B64}"}
]' && echo "✅ GEMINI_API_KEY 已注入" || echo "⚠️ GEMINI_API_KEY patch 失敗"
else
echo "⚠️ GEMINI_API_KEY 未設定,跳過"
fi
# 2026-04-01 Claude Code: Langfuse LLMOps keys (補齊 CD 注入,之前只有手動設定)
if [ -n "${LANGFUSE_PUBLIC_KEY}" ] && [ -n "${LANGFUSE_SECRET_KEY}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/LANGFUSE_PUBLIC_KEY","value":"'$(echo -n "${LANGFUSE_PUBLIC_KEY}" | base64 -w 0)'"},
{"op":"add","path":"/data/LANGFUSE_SECRET_KEY","value":"'$(echo -n "${LANGFUSE_SECRET_KEY}" | base64 -w 0)'"}
if [ -n "${LANGFUSE_PUBLIC_KEY_B64}" ] && [ -n "${LANGFUSE_SECRET_KEY_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/LANGFUSE_PUBLIC_KEY","value":"${LANGFUSE_PUBLIC_KEY_B64}"},
{"op":"add","path":"/data/LANGFUSE_SECRET_KEY","value":"${LANGFUSE_SECRET_KEY_B64}"}
]' && echo "✅ LANGFUSE keys 已注入" || echo "⚠️ LANGFUSE keys patch 失敗"
else
echo "⚠️ LANGFUSE_PUBLIC_KEY/SECRET_KEY 未設定,跳過 (現有 K8s secret 值維持不變)"
fi
# 2026-04-02 Claude Code: Telegram Whitelist (授權簽核用戶 ID)
if [ -n "${TG_USER_WHITELIST}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/OPENCLAW_TG_USER_WHITELIST","value":"'$(echo -n "${TG_USER_WHITELIST}" | base64 -w 0)'"}
if [ -n "${TG_USER_WHITELIST_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/OPENCLAW_TG_USER_WHITELIST","value":"${TG_USER_WHITELIST_B64}"}
]' && echo "✅ TG_USER_WHITELIST 已注入" || echo "⚠️ TG_USER_WHITELIST patch 失敗"
fi
# Phase O-4.1 2026-04-02: Sentry Auth Token (Wave A.1 ADR-037)
if [ -n "${SENTRY_AUTH_TOKEN}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SENTRY_AUTH_TOKEN","value":"'$(echo -n "${SENTRY_AUTH_TOKEN}" | base64 -w 0)'"}
if [ -n "${SENTRY_AUTH_TOKEN_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SENTRY_AUTH_TOKEN","value":"${SENTRY_AUTH_TOKEN_B64}"}
]' && echo "✅ SENTRY_AUTH_TOKEN 已注入" || echo "⚠️ SENTRY_AUTH_TOKEN patch 失敗"
else
echo "⚠️ SENTRY_AUTH_TOKEN 未設定Sentry Comment API 將跳過"
fi
# ADR-059 2026-04-05 Claude Code: Gitea Webhook Secret
if [ -n "${GITEA_WEBHOOK_SECRET}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/GITEA_WEBHOOK_SECRET","value":"'$(echo -n "${GITEA_WEBHOOK_SECRET}" | base64 -w 0)'"}
if [ -n "${GITEA_WEBHOOK_SECRET_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/GITEA_WEBHOOK_SECRET","value":"${GITEA_WEBHOOK_SECRET_B64}"}
]' && echo "✅ GITEA_WEBHOOK_SECRET 已注入" || echo "⚠️ GITEA_WEBHOOK_SECRET patch 失敗"
else
echo "⚠️ GITEA_WEBHOOK_SECRET 未設定Gitea Webhook 簽章驗證將在 prod 失效"
fi
# MCP Phase 3: ArgoCD API Token (2026-04-11 Claude Sonnet 4.6)
if [ -n "${ARGOCD_API_TOKEN}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/ARGOCD_API_TOKEN","value":"'$(echo -n "${ARGOCD_API_TOKEN}" | base64 -w 0)'"}
if [ -n "${ARGOCD_API_TOKEN_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/ARGOCD_API_TOKEN","value":"${ARGOCD_API_TOKEN_B64}"}
]' && echo "✅ ARGOCD_API_TOKEN 已注入" || echo "⚠️ ARGOCD_API_TOKEN patch 失敗"
else
echo "⚠️ ARGOCD_API_TOKEN 未設定ArgoCD MCP 將使用空 token"
@@ -488,91 +647,98 @@ jobs:
# 注意: 每個 block 與上方維持相同結構if guard + base64 -w 0 + json patch
# DATABASE_URL — PG 應用連線串2026-04-18 輪替)
if [ -n "${DATABASE_URL}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/DATABASE_URL","value":"'$(echo -n "${DATABASE_URL}" | base64 -w 0)'"}
if [ -n "${DATABASE_URL_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/DATABASE_URL","value":"${DATABASE_URL_B64}"}
]' && echo "✅ DATABASE_URL 已注入" || echo "⚠️ DATABASE_URL patch 失敗"
else
echo "⚠️ DATABASE_URL 未設定awoooi-api 將無法連 PG"
fi
# MIGRATION_DATABASE_URL — CI migration 用 awoooi_migrator 限權帳號ADR-090-B
if [ -n "${MIGRATION_DATABASE_URL}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/MIGRATION_DATABASE_URL","value":"'$(echo -n "${MIGRATION_DATABASE_URL}" | base64 -w 0)'"}
if [ -n "${MIGRATION_DATABASE_URL_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/MIGRATION_DATABASE_URL","value":"${MIGRATION_DATABASE_URL_B64}"}
]' && echo "✅ MIGRATION_DATABASE_URL 已注入" || echo "⚠️ MIGRATION_DATABASE_URL patch 失敗"
fi
# REDIS_URL — Redis 連線6380 on 188
if [ -n "${REDIS_URL}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/REDIS_URL","value":"'$(echo -n "${REDIS_URL}" | base64 -w 0)'"}
if [ -n "${REDIS_URL_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/REDIS_URL","value":"${REDIS_URL_B64}"}
]' && echo "✅ REDIS_URL 已注入" || echo "⚠️ REDIS_URL patch 失敗"
else
echo "⚠️ REDIS_URL 未設定"
fi
# JWT_SECRET / JWT_ALGORITHM — API 認證
if [ -n "${JWT_SECRET}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/JWT_SECRET","value":"'$(echo -n "${JWT_SECRET}" | base64 -w 0)'"}
if [ -n "${JWT_SECRET_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/JWT_SECRET","value":"${JWT_SECRET_B64}"}
]' && echo "✅ JWT_SECRET 已注入" || echo "⚠️ JWT_SECRET patch 失敗"
fi
if [ -n "${JWT_ALGORITHM}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/JWT_ALGORITHM","value":"'$(echo -n "${JWT_ALGORITHM}" | base64 -w 0)'"}
if [ -n "${JWT_ALGORITHM_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/JWT_ALGORITHM","value":"${JWT_ALGORITHM_B64}"}
]' && echo "✅ JWT_ALGORITHM 已注入" || echo "⚠️ JWT_ALGORITHM patch 失敗"
fi
# WEBHOOK_HMAC_SECRET — Alertmanager webhook HMAC 簽章
if [ -n "${WEBHOOK_HMAC_SECRET}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/WEBHOOK_HMAC_SECRET","value":"'$(echo -n "${WEBHOOK_HMAC_SECRET}" | base64 -w 0)'"}
if [ -n "${WEBHOOK_HMAC_SECRET_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/WEBHOOK_HMAC_SECRET","value":"${WEBHOOK_HMAC_SECRET_B64}"}
]' && echo "✅ WEBHOOK_HMAC_SECRET 已注入" || echo "⚠️ WEBHOOK_HMAC_SECRET patch 失敗"
fi
# AWOOOP_OPERATOR_API_KEY — AwoooP Operator mutation endpoints
if [ -n "${AWOOOP_OPERATOR_API_KEY_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/AWOOOP_OPERATOR_API_KEY","value":"${AWOOOP_OPERATOR_API_KEY_B64}"}
]' && echo "✅ AWOOOP_OPERATOR_API_KEY 已注入" || echo "⚠️ AWOOOP_OPERATOR_API_KEY patch 失敗"
fi
# SENTRY_DSN — Sentry 錯誤追蹤(不是 auth token
if [ -n "${SENTRY_DSN}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SENTRY_DSN","value":"'$(echo -n "${SENTRY_DSN}" | base64 -w 0)'"}
if [ -n "${SENTRY_DSN_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SENTRY_DSN","value":"${SENTRY_DSN_B64}"}
]' && echo "✅ SENTRY_DSN 已注入" || echo "⚠️ SENTRY_DSN patch 失敗"
fi
# CLAUDE_API_KEY — Claude 備援 LLM
if [ -n "${CLAUDE_API_KEY}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/CLAUDE_API_KEY","value":"'$(echo -n "${CLAUDE_API_KEY}" | base64 -w 0)'"}
if [ -n "${CLAUDE_API_KEY_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/CLAUDE_API_KEY","value":"${CLAUDE_API_KEY_B64}"}
]' && echo "✅ CLAUDE_API_KEY 已注入" || echo "⚠️ CLAUDE_API_KEY patch 失敗"
fi
# GITEA_API_TOKEN — Gitea API Token從 AWOOOI_GITEA_API_TOKEN 映射)
if [ -n "${GITEA_API_TOKEN}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/GITEA_API_TOKEN","value":"'$(echo -n "${GITEA_API_TOKEN}" | base64 -w 0)'"}
if [ -n "${GITEA_API_TOKEN_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/GITEA_API_TOKEN","value":"${GITEA_API_TOKEN_B64}"}
]' && echo "✅ GITEA_API_TOKEN 已注入" || echo "⚠️ GITEA_API_TOKEN patch 失敗"
fi
# NEMOTRON_BOT_TOKEN / OPENCLAW_BOT_TOKEN — 多 Bot 架構
if [ -n "${NEMOTRON_BOT_TOKEN}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/NEMOTRON_BOT_TOKEN","value":"'$(echo -n "${NEMOTRON_BOT_TOKEN}" | base64 -w 0)'"}
if [ -n "${NEMOTRON_BOT_TOKEN_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/NEMOTRON_BOT_TOKEN","value":"${NEMOTRON_BOT_TOKEN_B64}"}
]' && echo "✅ NEMOTRON_BOT_TOKEN 已注入" || echo "⚠️ NEMOTRON_BOT_TOKEN patch 失敗"
fi
if [ -n "${OPENCLAW_BOT_TOKEN}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/OPENCLAW_BOT_TOKEN","value":"'$(echo -n "${OPENCLAW_BOT_TOKEN}" | base64 -w 0)'"}
if [ -n "${OPENCLAW_BOT_TOKEN_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/OPENCLAW_BOT_TOKEN","value":"${OPENCLAW_BOT_TOKEN_B64}"}
]' && echo "✅ OPENCLAW_BOT_TOKEN 已注入" || echo "⚠️ OPENCLAW_BOT_TOKEN patch 失敗"
fi
# SMTP_HOST / SRE_GROUP_CHAT_ID
if [ -n "${SMTP_HOST}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SMTP_HOST","value":"'$(echo -n "${SMTP_HOST}" | base64 -w 0)'"}
if [ -n "${SMTP_HOST_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SMTP_HOST","value":"${SMTP_HOST_B64}"}
]' && echo "✅ SMTP_HOST 已注入" || echo "⚠️ SMTP_HOST patch 失敗"
fi
if [ -n "${SRE_GROUP_CHAT_ID}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SRE_GROUP_CHAT_ID","value":"'$(echo -n "${SRE_GROUP_CHAT_ID}" | base64 -w 0)'"}
if [ -n "${SRE_GROUP_CHAT_ID_B64}" ]; then
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
{"op":"add","path":"/data/SRE_GROUP_CHAT_ID","value":"${SRE_GROUP_CHAT_ID_B64}"}
]' && echo "✅ SRE_GROUP_CHAT_ID 已注入" || echo "⚠️ SRE_GROUP_CHAT_ID patch 失敗"
fi
@@ -590,26 +756,27 @@ jobs:
EXPECTED_HOSTS=4
PRESENT=0
for ip in 192.168.0.110 192.168.0.120 192.168.0.121 192.168.0.188; do
if grep -qE "^${ip}[[:space:]]" /tmp/known_hosts_repair 2>/dev/null; then
PRESENT=$((PRESENT + 1))
if grep -qE "^\${ip}[[:space:]]" /tmp/known_hosts_repair 2>/dev/null; then
PRESENT=\$((PRESENT + 1))
else
echo "⚠️ ssh-keyscan 缺主機 ${ip}"
echo "⚠️ ssh-keyscan 缺主機 \${ip}"
fi
done
if [ "$PRESENT" -eq "$EXPECTED_HOSTS" ]; then
sudo kubectl create secret generic awoooi-repair-known-hosts \
if [ "\$PRESENT" -eq "\$EXPECTED_HOSTS" ]; then
\$KUBECTL create secret generic awoooi-repair-known-hosts \
-n awoooi-prod \
--from-file=known_hosts=/tmp/known_hosts_repair \
--dry-run=client -o yaml | sudo kubectl apply -f - \
--dry-run=client -o yaml | \$KUBECTL apply -f - \
&& echo "✅ awoooi-repair-known-hosts Secret 已建立/更新" \
|| echo "⚠️ awoooi-repair-known-hosts Secret 建立失敗 (非致命)"
sudo kubectl patch secret ssh-mcp-key -n awoooi-prod --type=merge \
-p='{"data":{"known_hosts":"'$(base64 -w 0 /tmp/known_hosts_repair)'"}}' \
KNOWN_HOSTS_B64=\$(base64 -w 0 /tmp/known_hosts_repair)
\$KUBECTL patch secret ssh-mcp-key -n awoooi-prod --type=merge \
-p="{\"data\":{\"known_hosts\":\"\${KNOWN_HOSTS_B64}\"}}" \
&& echo "✅ ssh-mcp-key known_hosts 已更新4 台主機完整)" \
|| echo "⚠️ ssh-mcp-key known_hosts 更新失敗 (非致命)"
rm -f /tmp/known_hosts_repair /tmp/known_hosts_scan_err
else
echo "❌ ssh-keyscan 只抓到 ${PRESENT}/${EXPECTED_HOSTS} 台主機,跳過 patch保留現有 secret"
echo "❌ ssh-keyscan 只抓到 \${PRESENT}/\${EXPECTED_HOSTS} 台主機,跳過 patch保留現有 secret"
cat /tmp/known_hosts_scan_err 2>/dev/null | head -10
rm -f /tmp/known_hosts_repair /tmp/known_hosts_scan_err
fi
@@ -627,27 +794,36 @@ jobs:
# 4. 等待 ArgoCD sync + rollout 完成
# 5. Health Check
- name: Deploy to K8s (ArgoCD GitOps)
env:
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
GITEA_TOKEN: ${{ secrets.CD_PUSH_TOKEN }}
run: |
write_deploy_key() {
mkdir -p "${HOME}/.ssh"
umask 077
cat > "${HOME}/.ssh/deploy_key" <<'AWOOOI_DEPLOY_KEY'
${{ secrets.DEPLOY_SSH_KEY }}
AWOOOI_DEPLOY_KEY
chmod 600 "${HOME}/.ssh/deploy_key"
}
mkdir -p ~/.ssh
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
chmod 600 ~/.ssh/deploy_key
ssh-keyscan 192.168.0.121 >> ~/.ssh/known_hosts 2>/dev/null
write_deploy_key
# 2026-05-13 Codex: mirror Inject K8s Secrets host-key handling so the
# deploy job never reaches SSH with a known_hosts file missing ED25519.
ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${HOME}/.ssh/known_hosts" 2>/dev/null
test -s "${HOME}/.ssh/known_hosts" || { echo "❌ K8S host keyscan failed: ${K8S_SSH_HOST}"; exit 1; }
SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${HOME}/.ssh/known_hosts -o ConnectTimeout=10"
IMAGE_TAG="${{ github.sha }}"
HARBOR=192.168.0.110:5000
# ─── Step 1: Apply ConfigMap + ServiceRegistry (ArgoCD 管的是 DeploymentConfigMap 仍直接 apply) ───
cat k8s/awoooi-prod/04-configmap.yaml | \
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 \
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
"KUBECTL='sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${{ env.K8S_API_SERVER }}'; \$KUBECTL apply -f -"
echo "✅ ConfigMap 已更新"
cat k8s/awoooi-prod/15-service-registry-configmap.yaml | \
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 \
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
"KUBECTL='sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${{ env.K8S_API_SERVER }}'; \$KUBECTL apply -f -"
echo "✅ Service Registry ConfigMap 已更新"
# ─── Step 2: 更新 kustomization.yaml image tag ───
@@ -677,7 +853,7 @@ jobs:
git commit -m "chore(cd): deploy ${IMAGE_TAG::7} [skip ci]"
# 用 token 推送(避免 SSH key 需要額外設定 push 權限)
git remote remove gitea 2>/dev/null || true
git remote add gitea http://wooo:${GITEA_TOKEN}@192.168.0.110:3001/wooo/awoooi.git
git remote add gitea "http://wooo:${{ secrets.CD_PUSH_TOKEN }}@192.168.0.110:3001/wooo/awoooi.git"
# 先 rebase 避免 non-fast-forward (其他 commit 在 CI 期間已推入)
# 2026-04-17 ogt: -X theirs — kustomization.yaml 衝突時採用當次部署的 image tag
git fetch gitea main
@@ -688,23 +864,24 @@ jobs:
}
# ─── Step 4: 等待 ArgoCD sync + rollout ───
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 \
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
"EXPECTED_REVISION='${DEPLOY_REVISION}' bash -s" << 'ARGOCD_WAIT'
set -e
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
K8S_API_SERVER="${{ env.K8S_API_SERVER }}"
KUBECTL="sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${K8S_API_SERVER}"
# 等待 ArgoCD Application Synced最多 180s。只看
# Synced/Healthy 可能誤判成上一個 revision 已同步,因此有
# deploy commit 時必須同時確認 status.sync.revision。
echo "⏳ 等待 ArgoCD sync..."
sudo kubectl annotate application awoooi-prod -n argocd \
$KUBECTL annotate application awoooi-prod -n argocd \
argocd.argoproj.io/refresh=hard --overwrite >/dev/null 2>&1 || true
for i in $(seq 1 36); do
SYNC=$(sudo kubectl get application awoooi-prod -n argocd \
SYNC=$($KUBECTL get application awoooi-prod -n argocd \
-o jsonpath='{.status.sync.status}' 2>/dev/null || echo "Unknown")
HEALTH=$(sudo kubectl get application awoooi-prod -n argocd \
HEALTH=$($KUBECTL get application awoooi-prod -n argocd \
-o jsonpath='{.status.health.status}' 2>/dev/null || echo "Unknown")
REVISION=$(sudo kubectl get application awoooi-prod -n argocd \
REVISION=$($KUBECTL get application awoooi-prod -n argocd \
-o jsonpath='{.status.sync.revision}' 2>/dev/null || echo "Unknown")
SHORT_REVISION=$(echo "$REVISION" | cut -c1-8)
SHORT_EXPECTED=$(echo "$EXPECTED_REVISION" | cut -c1-8)
@@ -723,15 +900,15 @@ jobs:
done
# 確認 rollout 完成
sudo kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s
sudo kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=120s
sudo kubectl rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s
$KUBECTL rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s
$KUBECTL rollout status deployment/awoooi-web -n awoooi-prod --timeout=120s
$KUBECTL rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s
echo "✅ 部署完成"
# Health Check
HEALTH_PASS=0
for i in 1 2 3; do
HTTP_CODE=$(curl -s -w "%{http_code}" -o /dev/null --connect-timeout 10 "http://localhost:32334/api/v1/health")
HTTP_CODE=$(curl -s -w "%{http_code}" -o /dev/null --connect-timeout 10 "${{ env.API_HEALTH_URL }}")
if [ "$HTTP_CODE" = "200" ]; then
echo "✅ API 健康檢查通過"
HEALTH_PASS=1
@@ -747,36 +924,18 @@ jobs:
ARGOCD_WAIT
# 2026-04-09 Claude Sonnet 4.6: Sprint 5.2 — 同步 ops 腳本到 188 (ollama user)
# DEPLOY_SSH_KEY_188 = gitea-cd-deploy-188 (ed25519只有 188 authorized_keys)
# 腳本: docker-health-monitor.sh + pg-backup.sh (感知層 + 備份)
# 188 deploy key is rotated and must not be read by this disabled step.
# 腳本: docker-health-monitor.sh + pg-backup.sh + notify-awoooi-ops.sh
# 感知層與備份通知都先走 AWOOI API/AwoooPTelegram 直發只保留 API 離線 fallback。
- name: Sync Ops Scripts to 188
# 2026-05-13 Codex T14e/P0:
# Disabled until the 188 ops sync path is moved to a file-secret or
# Ansible-controlled channel. Gitea Actions logs step env values, and
# multiline SSH secrets must not be exposed through CD logs.
if: ${{ false }}
continue-on-error: true
env:
SSH_KEY_188: ${{ secrets.DEPLOY_SSH_KEY_188 }}
run: |
mkdir -p ~/.ssh
echo "$SSH_KEY_188" > ~/.ssh/deploy_key_188
chmod 600 ~/.ssh/deploy_key_188
ssh-keyscan 192.168.0.188 >> ~/.ssh/known_hosts 2>/dev/null
# 同步 docker-health-monitor.sh
scp -i ~/.ssh/deploy_key_188 \
scripts/ops/docker-health-monitor.sh \
ollama@192.168.0.188:~/awoooi-ops/docker-health-monitor.sh \
&& echo "✅ docker-health-monitor.sh 已同步" \
|| echo "⚠️ docker-health-monitor.sh 同步失敗"
# 同步 pg-backup.sh
scp -i ~/.ssh/deploy_key_188 \
scripts/ops/pg-backup.sh \
ollama@192.168.0.188:~/awoooi-ops/pg-backup.sh \
&& echo "✅ pg-backup.sh 已同步" \
|| echo "⚠️ pg-backup.sh 同步失敗"
# 確保執行權限
ssh -i ~/.ssh/deploy_key_188 ollama@192.168.0.188 \
"chmod +x ~/awoooi-ops/docker-health-monitor.sh ~/awoooi-ops/pg-backup.sh && echo '✅ 權限設定完成'" \
|| echo "⚠️ 權限設定失敗"
echo "188 ops script sync disabled pending secure key rotation path"
- name: Notify Pipeline Failure
if: failure()
@@ -786,10 +945,20 @@ jobs:
ACTOR="${{ github.actor }}"
COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🏗️ Stage: build-and-deploy\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
if AWOOI_CICD_STATUS=failed \
AWOOI_CICD_STAGE=build-and-deploy \
AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "✅ CI/CD build failure notification mirrored through AWOOI API"
else
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
fi
post-deploy-checks:
needs: build-and-deploy
@@ -798,6 +967,14 @@ jobs:
# install-deps can also kill the act-managed job container with RWLayer=nil.
runs-on: awoooi-host
steps:
- name: Bootstrap Host Runner Tools
# 2026-05-05 Codex: post-deploy also uses checkout and curl-based
# notifications, so it needs the same runner bootstrap as earlier jobs.
run: |
if command -v apk >/dev/null 2>&1; then
apk add --no-cache nodejs npm git curl bash openssh-client docker-cli docker-cli-buildx
fi
- uses: actions/checkout@v4
- name: Get Commit Info
@@ -814,7 +991,7 @@ jobs:
- name: Alert Chain Smoke Test
id: alert_chain_smoke
run: |
# 2026-04-05 Claude Code: 使用真實 API 地址192.168.0.121:32334 NodePort
# 2026-05-05 Codex: use the keepalived VIP instead of a fixed node.
# Host runner launches the CI image explicitly to avoid act RWLayer=nil.
if docker run --rm \
--name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-alert-smoke" \
@@ -824,7 +1001,7 @@ jobs:
-v awoooi-api-venv-cache:/opt/api-venv \
-w /workspace \
"${{ env.CI_IMAGE }}" \
bash -lc 'source /opt/api-venv/bin/activate && python3 scripts/alert_chain_smoke_test.py --api-url http://192.168.0.121:32334 --json | tee /tmp/alert_chain_result.json'; then
bash -lc 'source /opt/api-venv/bin/activate && python3 scripts/alert_chain_smoke_test.py --api-url ${{ env.ALERT_CHAIN_API_URL }} --json | tee /tmp/alert_chain_result.json'; then
echo "alert_chain_status=pass" >> $GITHUB_OUTPUT
else
echo "alert_chain_status=fail" >> $GITHUB_OUTPUT
@@ -941,9 +1118,19 @@ jobs:
COMMIT_MSG="${{ steps.commit.outputs.message }}"
SHORT_SHA="${{ steps.commit.outputs.short_sha }}"
TG_MSG="✅ AWOOOI 部署完成\n├ 📝 ${COMMIT_MSG}\n├ 🔖 ${SHORT_SHA}\n├ ⏱️ 耗時: ${MINUTES}m ${SECONDS}s\n├ 📦 API: ✅ Web: ✅\n├ 🩺 Health: ✅\n├ 🔗 Alert Chain: ${ALERT_CHAIN_RESULT}\n├ 📊 Monitoring: ${MONITORING_RESULT}\n└ 🎭 Smoke: ${SMOKE_RESULT}"
printf '%b' "$TG_MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
--data-urlencode "text@-" || echo "TG notify warning (non-fatal)"
if AWOOI_CICD_STATUS=success \
AWOOI_CICD_STAGE=post-deploy \
AWOOI_CICD_JOB_NAME="AWOOOI 部署完成" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
AWOOI_CICD_SUMMARY="API=✅; Web=✅; AlertChain=${ALERT_CHAIN_RESULT}; Monitoring=${MONITORING_RESULT}; Smoke=${SMOKE_RESULT}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "✅ CI/CD success notification mirrored through AWOOI API"
else
printf '%b' "$TG_MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
--data-urlencode "text@-" || echo "TG notify warning (non-fatal)"
fi
- name: Notify Pipeline Failure
# 2026-04-16 ogt + Claude Sonnet 4.6: 改用 HTML 結構化格式
@@ -954,7 +1141,17 @@ jobs:
ACTOR="${{ github.actor }}"
COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🩺 Stage: post-deploy-checks\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
if AWOOI_CICD_STATUS=failed \
AWOOI_CICD_STAGE=post-deploy-checks \
AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "✅ CI/CD post-deploy failure notification mirrored through AWOOI API"
else
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
fi

View File

@@ -30,6 +30,9 @@ jobs:
with:
fetch-depth: 50
- name: Guard Workflow Secret Surfaces
run: node scripts/ci/check-gitea-step-env-secrets.js
- name: Skip Stale Main Push
id: stale
run: |
@@ -102,7 +105,6 @@ jobs:
- name: Notify Code Review Start
if: steps.stale.outputs.skip != 'true'
env:
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
BRANCH: ${{ steps.ctx.outputs.branch }}
@@ -110,18 +112,33 @@ jobs:
FILES_DISPLAY: ${{ steps.ctx.outputs.files_display }}
run: |
set -euo pipefail
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
echo "Telegram secret missing; skip start notification"
exit 0
fi
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_BOT_TOKEN
)"
html_escape() { sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g'; }
COMMIT_ESC="$(printf '%s' "$COMMIT_MSG" | html_escape)"
FILES_ESC="$(printf '%s\n' "$FILES_DISPLAY" | html_escape)"
MSG="$(printf '🔍 <b>Code Review 啟動</b>\n──────────────────────\n📦 Commit <code>%s</code> 🌿 <code>%s</code>\n📝 <code>%s</code>\n📁 <b>變更檔案:</b>\n%s\n──────────────────────\n🤖 <b>Hermes → OpenClaw → Elephant Alpha → NemoTron</b>\n📊 即時進度:<a href=\"%s\">%s</a>' "$SHORT_SHA" "$BRANCH" "$COMMIT_ESC" "$FILES_ESC" "$REPORT_URL" "$REPORT_URL")"
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null
if AWOOI_CICD_STATUS=running \
AWOOI_CICD_STAGE=code-review \
AWOOI_CICD_JOB_NAME="Code Review 啟動" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Code review start notification mirrored through AWOOI API"
else
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
echo "Telegram secret missing and AWOOI API notify failed; skip start notification"
exit 0
fi
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null
fi
- name: Run Deterministic Review
if: steps.stale.outputs.skip != 'true'
@@ -139,15 +156,14 @@ jobs:
- name: Notify Code Review Completion
if: always() && steps.stale.outputs.skip != 'true'
env:
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
run: |
set -euo pipefail
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
echo "Telegram secret missing; skip completion notification"
exit 0
fi
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_BOT_TOKEN
)"
REPORT=/tmp/code-review-report.json
if [ ! -s "$REPORT" ]; then
cat > "$REPORT" <<'JSON'
@@ -180,7 +196,25 @@ jobs:
TOP_ESC="$(printf '%s' "$TOP_ISSUE" | html_escape)"
MSG="$(printf '%s <b>Code Review 完成・%s</b>\n──────────────────────\n🔴 CRITICAL <code>%s</code> 🟠 HIGH <code>%s</code> 🟡 MEDIUM <code>%s</code> 🟢 LOW <code>%s</code>\n──────────────────────\n⚠ <b>主要問題</b>\n%s\n\n🔍 <b>整體風險等級</b>\n%s%s\n\n⚠ <b>最高關注問題</b>\n1. %s\n──────────────────────\n🤖 Elephant Alpha<b>%s</b> ✅ %s\n📊 完整報告:<a href=\"%s\">%s</a>' "$STATUS" "$SHORT_SHA" "$CRITICAL" "$HIGH" "$MEDIUM" "$LOW" "$ISSUE_LINE" "$RISK" "$SUMMARY_ESC" "$TOP_ESC" "$RISK" "$ACTION_ESC" "$REPORT_URL" "$REPORT_URL")"
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null
CICD_STATUS=success
if [ "$RISK" = "MEDIUM" ]; then CICD_STATUS=pending; fi
if [ "$RISK" = "HIGH" ] || [ "$RISK" = "CRITICAL" ]; then CICD_STATUS=failed; fi
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
AWOOI_CICD_STAGE=code-review \
AWOOI_CICD_JOB_NAME="Code Review 完成・${RISK}" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
AWOOI_CICD_SUMMARY="CRITICAL=${CRITICAL}; HIGH=${HIGH}; MEDIUM=${MEDIUM}; LOW=${LOW}; ${SUMMARY}" \
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Code review completion notification mirrored through AWOOI API"
else
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
echo "Telegram secret missing and AWOOI API notify failed; skip completion notification"
exit 0
fi
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null
fi

View File

@@ -1,7 +1,7 @@
# =============================================================================
# Deploy Prometheus Alert Rules (獨立 workflow)
# 2026-04-05 Claude Code (ADR-039 I3): 從 cd.yaml 分離
# 觸發條件: ops/monitoring/alerts-unified.yml 有變更 或 workflow_dispatch
# 觸發條件: ops/monitoring/alerts-unified.yml / slo-rules.yml 有變更 或 workflow_dispatch
# 說明: 告警規則部署不依賴應用構建,獨立觸發以加快響應速度
# =============================================================================
@@ -12,6 +12,8 @@ on:
branches: [main]
paths:
- 'ops/monitoring/alerts-unified.yml'
- 'ops/monitoring/slo-rules.yml'
- 'scripts/ops/deploy-alerts.sh'
workflow_dispatch:
env:
@@ -30,11 +32,15 @@ jobs:
run: |
pip3 install -q pyyaml 2>/dev/null || pip install -q pyyaml
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/alerts-unified.yml')); print('YAML OK')"
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/slo-rules.yml')); print('SLO YAML OK')"
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.DEPLOY_SSH_KEY }}" > ~/.ssh/id_ed25519
umask 077
cat > ~/.ssh/id_ed25519 <<'AWOOOI_DEPLOY_KEY'
${{ secrets.DEPLOY_SSH_KEY }}
AWOOOI_DEPLOY_KEY
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan 192.168.0.110 >> ~/.ssh/known_hosts
@@ -50,6 +56,17 @@ jobs:
SHORT_SHA="${{ github.sha }}"
SHORT_SHA="${SHORT_SHA:0:7}"
MSG="${EMOJI} Prometheus 告警規則部署 ${STATUS} (${SHORT_SHA})"
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
--data-urlencode "text=${MSG}" || true
CICD_STATUS="success"
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
AWOOI_CICD_STAGE=deploy-alerts \
AWOOI_CICD_JOB_NAME="Prometheus 告警規則部署" \
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
AWOOI_CICD_SUMMARY="${MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Alert rule deploy notification mirrored through AWOOI API"
else
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
--data-urlencode "text=${MSG}" || true
fi

View File

@@ -54,7 +54,17 @@ jobs:
- name: Notify Telegram on Failure
if: failure()
run: |
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d parse_mode="HTML" \
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
MSG="E2E Health Check 失敗API 健康檢查未通過"
if AWOOI_CICD_STATUS=failed \
AWOOI_CICD_STAGE=e2e-health \
AWOOI_CICD_JOB_NAME="E2E Health Check" \
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
AWOOI_CICD_SUMMARY="${MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "E2E failure notification mirrored through AWOOI API"
else
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d parse_mode="HTML" \
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
fi

View File

@@ -17,6 +17,7 @@ on:
branches: [main]
paths:
- 'apps/api/migrations/*.sql'
workflow_dispatch:
env:
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
@@ -56,45 +57,101 @@ jobs:
- name: Identify new migrations
id: diff
run: |
NEW_FILES=$(git diff --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
ALL_NEW_FILES=$(git diff --no-renames --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
NEW_FILES=$(echo "$ALL_NEW_FILES" | grep -Ev '(_down|rollback)\.sql$' || true)
SKIPPED_ROLLBACK_FILES=$(echo "$ALL_NEW_FILES" | grep -E '(_down|rollback)\.sql$' || true)
echo "new_files<<EOF" >> $GITHUB_OUTPUT
echo "$NEW_FILES" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "=== New migration files ==="
echo "$NEW_FILES"
if [ -n "$SKIPPED_ROLLBACK_FILES" ]; then
echo "=== Rollback/down migrations skipped by design ==="
echo "$SKIPPED_ROLLBACK_FILES"
fi
- name: Apply new migrations
if: steps.diff.outputs.new_files != ''
env:
# 從 Gitea secrets 取,不直接明碼
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
run: |
set -euo pipefail
# 從 Gitea secrets 取,不放 step-level env避免 runner log 展開。
# MIGRATION_DATABASE_URL 是限權帳號DATABASE_URL 只在 PostgreSQL
# 明確回報「必須是 table owner」時作為受控 fallback。
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
${{ secrets.MIGRATION_DATABASE_URL }}
AWOOOI_SECRET_MIGRATION_DATABASE_URL
)"
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
${{ secrets.DATABASE_URL }}
AWOOOI_SECRET_DATABASE_URL
)"
if [ -z "$PGURL" ]; then
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
exit 1
fi
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
apply_migration() {
local url="$1"
local file="$2"
psql "$url" \
-v ON_ERROR_STOP=1 \
--single-transaction \
-f "$file"
}
# 套用每個新檔 (single transaction per file)
echo "${{ steps.diff.outputs.new_files }}" | while IFS= read -r file; do
[ -z "$file" ] && continue
echo "=== Applying: $file ==="
psql "$PGURL_PSQL" \
-v ON_ERROR_STOP=1 \
--single-transaction \
-f "$file"
migration_err="$(mktemp)"
if ! apply_migration "$PGURL_PSQL" "$file" 2>"$migration_err"; then
if grep -Eq "(must be owner of table|permission denied for table)" "$migration_err"; then
if [ -z "$OWNER_PGURL_PSQL" ]; then
cat "$migration_err" >&2
echo "::error::migration requires table owner but DATABASE_URL secret is not set"
exit 1
fi
echo "::warning::migration requires table owner; retrying with owner connection"
apply_migration "$OWNER_PGURL_PSQL" "$file"
else
cat "$migration_err" >&2
exit 1
fi
fi
rm -f "$migration_err"
echo "=== OK: $file ==="
done
- name: Seed asset_discovery_run (audit)
if: steps.diff.outputs.new_files != ''
env:
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
run: |
set -euo pipefail
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
${{ secrets.MIGRATION_DATABASE_URL }}
AWOOOI_SECRET_MIGRATION_DATABASE_URL
)"
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
${{ secrets.DATABASE_URL }}
AWOOOI_SECRET_DATABASE_URL
)"
if [ -z "$PGURL" ]; then
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
exit 1
fi
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
FILES_JSON=$(echo "${{ steps.diff.outputs.new_files }}" | jq -Rn '[inputs | select(length > 0)]')
psql "$PGURL_PSQL" -c "
SUMMARY_JSON=$(jq -cn \
--arg commit_sha "${{ github.sha }}" \
--argjson files "$FILES_JSON" \
'{type: "ci_migration", commit_sha: $commit_sha, files: $files}')
SUMMARY_JSON_SQL=${SUMMARY_JSON//\'/\'\'}
seed_audit() {
local url="$1"
psql "$url" -v ON_ERROR_STOP=1 <<SQL
INSERT INTO asset_discovery_run (
run_id, triggered_by, scope, scan_depth, status,
started_at, ended_at, tools_used, summary
@@ -106,23 +163,51 @@ jobs:
'success',
NOW(),
NOW(),
'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb,
jsonb_build_object(
'type', 'ci_migration',
'commit_sha', '${{ github.sha }}',
'files', $FILES_JSON
)
'{"psql": 1, "gitea_ci": 1}'::jsonb,
'${SUMMARY_JSON_SQL}'::jsonb
);
"
SQL
}
audit_err="$(mktemp)"
if ! seed_audit "$PGURL_PSQL" 2>"$audit_err"; then
if grep -q "permission denied for table asset_discovery_run" "$audit_err"; then
if [ -z "$OWNER_PGURL_PSQL" ]; then
cat "$audit_err" >&2
echo "::error::audit requires table insert privilege but DATABASE_URL secret is not set"
exit 1
fi
echo "::warning::audit requires owner connection; retrying with owner connection"
seed_audit "$OWNER_PGURL_PSQL"
else
cat "$audit_err" >&2
exit 1
fi
fi
rm -f "$audit_err"
- name: Notify Telegram (if configured)
if: always()
env:
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
run: |
TG_TOKEN="$(cat <<'AWOOOI_SECRET_TG_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_TOKEN
)"
STATUS="${{ job.status }}"
CICD_STATUS="success"
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
AWOOI_CICD_STAGE=run-migration \
AWOOI_CICD_JOB_NAME="Migration CI" \
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
AWOOI_CICD_SUMMARY="Migration CI: ${STATUS}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Migration notification mirrored through AWOOI API"
exit 0
fi
if [ -n "$TG_TOKEN" ] && [ -n "$TG_CHAT" ]; then
STATUS="${{ job.status }}"
MSG="🗄️ Migration CI: \`${STATUS}\` — commit ${{ github.sha }}"
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
-d chat_id="${TG_CHAT}" \

View File

@@ -13,12 +13,10 @@
name: CD
# 2026-05-12 Codex: GitHub 僅保留唯讀備份;生產 CI/CD 只能從 Gitea 執行。
# 本 workflow 曾可 push / workflow_dispatch 後 build、patch secret、kubectl apply
# 會和 `.gitea/workflows/cd.yaml` 競爭 K3s production 狀態,因此硬停用。
on:
push:
branches: [main]
paths-ignore:
- 'docs/**'
- '*.md'
workflow_dispatch:
inputs:
force_deploy:
@@ -60,6 +58,7 @@ jobs:
# ==================== Pre-flight Check (10s Fail-Fast) ====================
pre-flight-check:
name: "Pre-flight Check"
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
timeout-minutes: 1
steps:
@@ -133,6 +132,7 @@ jobs:
# 2026-03-29 Claude Code: 確保監控覆蓋率 >= 90%
monitoring-coverage:
name: "Monitoring Coverage"
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
needs: pre-flight-check
timeout-minutes: 2
@@ -152,6 +152,7 @@ jobs:
# ==================== 路徑偵測 (使用 dorny/paths-filter) ====================
detect-changes:
name: Detect Changes
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
needs: [pre-flight-check, monitoring-coverage]
timeout-minutes: 1
@@ -197,11 +198,7 @@ jobs:
runs-on: [self-hosted, harbor, k8s]
needs: [detect-changes, build-web]
timeout-minutes: 20
if: |
!inputs.skip_api && (
needs.detect-changes.outputs.api == 'true' ||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
)
if: ${{ false }}
outputs:
image_tag: ${{ steps.tag.outputs.tag }}
steps:
@@ -238,11 +235,7 @@ jobs:
runs-on: [self-hosted, harbor, k8s]
needs: detect-changes
timeout-minutes: 20
if: |
!inputs.skip_web && (
needs.detect-changes.outputs.web == 'true' ||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
)
if: ${{ false }}
outputs:
image_tag: ${{ steps.tag.outputs.tag }}
steps:
@@ -293,7 +286,7 @@ jobs:
concurrency:
group: runner-awoooi-cd-mutex
cancel-in-progress: false
if: always() && (needs.build-api.result == 'success' || needs.build-api.result == 'skipped') && (needs.build-web.result == 'success' || needs.build-web.result == 'skipped')
if: ${{ false }}
environment: production
steps:
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)

View File

@@ -14,15 +14,10 @@
name: Deploy to Production
# 2026-05-12 Codex: GitHub 是唯讀備份production deploy 只能從 Gitea 進入。
# 這份歷史 workflow 仍含 Harbor build/push 與 kubectl apply/rollout會和 Gitea CD 競爭。
# 保留檔案供稽核,但停用所有 job。
on:
push:
branches:
- main
paths:
- 'apps/api/**'
- 'apps/web/**'
- 'k8s/awoooi-prod/**'
- '.github/workflows/deploy-prod.yml'
workflow_dispatch:
inputs:
deploy_api:
@@ -70,6 +65,7 @@ jobs:
# ===========================================================================
build:
name: "Build Images"
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
outputs:
image_tag: ${{ steps.meta.outputs.tag }}
@@ -138,6 +134,7 @@ jobs:
deploy:
name: "Deploy to K3s"
needs: build
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
steps:
@@ -210,7 +207,7 @@ jobs:
smoke-test:
name: "Smoke Tests"
needs: deploy
if: ${{ !inputs.skip_tests }}
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
steps:
@@ -248,7 +245,7 @@ jobs:
notify:
name: "Send Notification"
needs: [build, deploy, smoke-test]
if: always()
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
steps:

1
.gitignore vendored
View File

@@ -93,3 +93,4 @@ tsconfig.tsbuildinfo
!.aiderignore
.claude/settings.local.json
.claude/settings.json
.claude/settings.json.bak*

View File

@@ -31,6 +31,9 @@
## 🔴 絕對禁止 → [HARD_RULES.md](docs/HARD_RULES.md)
## 🔴 文件語言鐵律 → [文件語言規範](docs/HARD_RULES.md#文件語言規範)
Markdown、ADR、LOGBOOK、Runbook、交接文件與計畫文件一律使用繁體中文程式符號、API、指令、錯誤碼、服務名稱與原始 log 可保留英文。
## 🔴 紅區治理 → [RED_ZONES.md](docs/RED_ZONES.md)
Tier 3 核心檔案 (decision_manager, trust_engine, config 等) 修改需首席架構師授權

View File

@@ -0,0 +1,49 @@
-- ADR-090 capacity_violation_event metric violation types
-- 日期2026-05-07台北
-- 目的:讓 capacity_scanner_job.py 寫入的 cpu/mem/swap 細項違規符合 DB constraint。
--
-- 背景:
-- capacity_scanner_job.py 會寫入:
-- - cpu_over_threshold
-- - mem_over_threshold
-- - swap_over_threshold
-- 但原始 ADR-090 DDL 只允許較粗的 host_saturation導致 production 出現
-- capacity_violation_event_type_valid check violation容量治理事件漏記。
BEGIN;
ALTER TABLE capacity_violation_event
DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
ALTER TABLE capacity_violation_event
ADD CONSTRAINT capacity_violation_event_type_valid
CHECK (violation_type IN (
'no_limit_set',
'over_request',
'over_limit',
'host_saturation',
'over_sla_budget',
'unauthorized_new_deploy',
'cpu_over_threshold',
'mem_over_threshold',
'swap_over_threshold',
'load_over_threshold'
));
COMMIT;
-- Rollback需人工確認後執行
-- BEGIN;
-- ALTER TABLE capacity_violation_event
-- DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
-- ALTER TABLE capacity_violation_event
-- ADD CONSTRAINT capacity_violation_event_type_valid
-- CHECK (violation_type IN (
-- 'no_limit_set',
-- 'over_request',
-- 'over_limit',
-- 'host_saturation',
-- 'over_sla_budget',
-- 'unauthorized_new_deploy'
-- ));
-- COMMIT;

View File

@@ -0,0 +1,36 @@
-- ADR-090-D: automation_operation_log.operation_type adds Ansible executor audit states
-- Created: 2026-05-12 Taipei
--
-- Purpose:
-- T3 Ansible declarative executor visibility. These operation types allow
-- the AI automation truth chain to record that Ansible was matched,
-- check-mode executed, applied, rolled back, or explicitly skipped.
--
-- Safety:
-- This migration only expands the CHECK allowlist. It does not execute
-- Ansible, change approval behavior, or create auto-remediation rows.
ALTER TABLE automation_operation_log
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
ALTER TABLE automation_operation_log
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
'monitor_configured','monitor_removed',
'alert_fired','alert_suppressed','alert_routed',
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
'playbook_generated','playbook_updated','playbook_executed',
'remediation_executed','remediation_verified','remediation_rolled_back',
'self_correction_attempted',
'km_created','km_updated','km_linked',
'asset_discovered','coverage_recalculated',
'capacity_recommendation','quota_enforced',
'notification_formatted',
'ansible_candidate_matched',
'ansible_check_mode_executed',
'ansible_apply_executed',
'ansible_rollback_executed',
'ansible_execution_skipped'
));
COMMENT ON CONSTRAINT automation_operation_log_type_valid ON automation_operation_log IS
'ADR-090-D: allow first-class Ansible executor audit states for AwoooP truth-chain visibility.';

View File

@@ -0,0 +1,19 @@
-- ADR-090-D rollback: remove Ansible executor audit states from operation_type allowlist.
-- Only apply after confirming no automation_operation_log rows use ansible_* operation types.
ALTER TABLE automation_operation_log
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
ALTER TABLE automation_operation_log
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
'monitor_configured','monitor_removed',
'alert_fired','alert_suppressed','alert_routed',
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
'playbook_generated','playbook_updated','playbook_executed',
'remediation_executed','remediation_verified','remediation_rolled_back',
'self_correction_attempted',
'km_created','km_updated','km_linked',
'asset_discovered','coverage_recalculated',
'capacity_recommendation','quota_enforced',
'notification_formatted'
));

View File

@@ -0,0 +1,164 @@
-- T9: approved SSH execution MCP Gateway seed
-- 目的:讓 Telegram/Approval 已批准的 SSH 修復動作通過 AwoooP Gateway 五閘門。
-- 邊界:只授權 approval_executorwrite/admin 仍需 Gate 5 短效 approval key。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_body AS (
SELECT jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', 'approval_executor',
'display_name', 'Approval Executor',
'project_id', 'awoooi',
'purpose', 'Approved SSH execution through AwoooP MCP Gateway',
'allowed_scopes', jsonb_build_array('read', 'write', 'admin'),
'requires_gate5_for_scopes', jsonb_build_array('write', 'admin'),
'stage', 't9_ssh_approval_gateway'
) AS body_json
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
'approval_executor',
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t9_ssh_approval_gateway',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'approval_executor'
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'approval_executor_active_contracts', count(*) FROM upsert_pointer;
WITH gateway_tools(tool_name, description, required_scope) AS (
VALUES
('ssh_diagnose', 'SSH host diagnosis read', 'read'),
('ssh_docker_restart', 'Approved Docker container restart over SSH', 'write'),
('ssh_docker_compose_restart', 'Approved Docker Compose service restart over SSH', 'write'),
('ssh_systemctl_restart', 'Approved systemd service restart over SSH', 'write'),
('ssh_clear_docker_logs', 'Approved Docker log truncation over SSH', 'write'),
('ssh_renew_ssl', 'Approved certbot renewal over SSH', 'write'),
('ssh_reload_nginx', 'Approved nginx config test and reload over SSH', 'write'),
('ssh_docker_prune', 'Approved Docker prune over SSH with provider disk guard', 'admin')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
jsonb_build_array(required_scope),
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM gateway_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id, tool_name, allowed_scopes
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
'approval_executor',
tool_id,
'migration:t9_ssh_approval_gateway',
allowed_scopes,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_by = EXCLUDED.granted_by,
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'approval_executor_ssh_gateway',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -0,0 +1,43 @@
-- Rollback for T9 approved SSH execution MCP Gateway seed.
-- Contract revisions are append-only; rollback revokes approval_executor grants
-- and deactivates only the write/admin tools introduced here.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t9_ssh_approval_gateway'
WHERE project_id = 'awoooi'
AND agent_id = 'approval_executor'
AND granted_by = 'migration:t9_ssh_approval_gateway'
AND is_revoked = FALSE;
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name IN (
'ssh_docker_restart',
'ssh_docker_compose_restart',
'ssh_systemctl_restart',
'ssh_clear_docker_logs',
'ssh_renew_ssl',
'ssh_reload_nginx',
'ssh_docker_prune'
);
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'approval_executor';
UPDATE awooop_contract_revisions
SET lifecycle_status = 'revoked'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'approval_executor'
AND publisher_id = 'migration:t9_ssh_approval_gateway'
AND lifecycle_status = 'active';

View File

@@ -0,0 +1,166 @@
-- T23: auto-repair executor read-only MCP Gateway seed
-- 目的:讓 YAML_RULE/PlayBook 的只讀 SSH 診斷步驟經過 AwoooP MCP Gateway。
-- 邊界:只授權 read scopewrite/admin SSH 工具仍必須走 approval_executor + Gate 5。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_body AS (
SELECT jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', 'auto_repair_executor',
'display_name', 'Auto Repair Executor',
'project_id', 'awoooi',
'purpose', 'Read-only auto-repair diagnostics through AwoooP MCP Gateway',
'allowed_scopes', jsonb_build_array('read'),
'forbidden_scopes', jsonb_build_array('write', 'admin'),
'stage', 't23_auto_repair_diagnostic_gateway'
) AS body_json
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
'auto_repair_executor',
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t23_auto_repair_executor_read_gateway',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'auto_repair_executor_active_contracts', count(*) FROM upsert_pointer;
WITH read_tools(tool_name, description) AS (
VALUES
('ssh_diagnose', 'SSH host/container diagnosis read'),
('ssh_get_top_processes', 'SSH top processes read'),
('ssh_get_disk_usage', 'SSH disk usage read'),
('ssh_get_memory_info', 'SSH memory info read'),
('ssh_get_container_logs', 'SSH container logs read'),
('ssh_get_container_status', 'SSH container status read'),
('ssh_get_service_status', 'SSH service status read'),
('ssh_check_port', 'SSH port check read'),
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
('ssh_get_swap_info', 'SSH swap info read')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM read_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id, tool_name, allowed_scopes
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
'auto_repair_executor',
tool_id,
'migration:t23_auto_repair_executor_read_gateway',
allowed_scopes,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_by = EXCLUDED.granted_by,
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'auto_repair_executor_read_gateway',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -0,0 +1,24 @@
-- Rollback T23 auto-repair executor read-only MCP Gateway grant.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t23_auto_repair_executor_read_gateway'
WHERE project_id = 'awoooi'
AND agent_id = 'auto_repair_executor'
AND granted_by = 'migration:t23_auto_repair_executor_read_gateway';
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor';
UPDATE awooop_contract_revisions
SET lifecycle_status = 'retired'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND publisher_id = 'migration:t23_auto_repair_executor_read_gateway'
AND lifecycle_status = 'active';

View File

@@ -0,0 +1,25 @@
-- =============================================================================
-- AwoooP / AWOOOI MCP Gateway Shadow Onboarding
-- 2026-05-13 Codex + ogt
--
-- 背景:
-- AWOOOI 已完成 read-only MCP tool registry / grants seed但 project 本身仍停在
-- legacy_awoooi_default會被 MCP Gateway Gate 1 正確攔截。
--
-- 邊界:
-- 只把 AWOOOI 租戶升到 shadow讓既有 Gate 1 生效。
-- write/admin tool 仍未授權;自動修復/破壞性動作不因本 migration 開放。
-- =============================================================================
BEGIN;
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_projects
SET
migration_mode = 'shadow',
updated_at = NOW()
WHERE project_id = 'awoooi'
AND migration_mode = 'legacy_awoooi_default';
COMMIT;

View File

@@ -0,0 +1,20 @@
-- =============================================================================
-- Rollback: AwoooP / AWOOOI MCP Gateway Shadow Onboarding
-- 2026-05-13 Codex + ogt
--
-- 只回退仍停在 shadow 的 AWOOOI若已由人工/後續 migration 推進到 canary/active
-- 不自動降級。
-- =============================================================================
BEGIN;
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_projects
SET
migration_mode = 'legacy_awoooi_default',
updated_at = NOW()
WHERE project_id = 'awoooi'
AND migration_mode = 'shadow';
COMMIT;

View File

@@ -0,0 +1,211 @@
-- T7: awoooi read-only MCP Gateway seed
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3產生 first-class audit。
-- 邊界:只授權 read scope不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_seed(agent_id, display_name) AS (
VALUES
('pre_decision_investigator', 'Pre-decision Investigator'),
('post_execution_verifier', 'Post-execution Verifier')
),
agent_body AS (
SELECT
agent_id,
jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', agent_id,
'display_name', display_name,
'project_id', 'awoooi',
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
'allowed_scopes', jsonb_build_array('read'),
'forbidden_scopes', jsonb_build_array('write', 'admin'),
'stage', 't7_mcp_gateway_read_sense'
) AS body_json
FROM agent_seed
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
agent_id,
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t7_mcp_gateway_read_seed',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN (SELECT agent_id FROM agent_seed)
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
WITH read_tools(tool_name, description) AS (
VALUES
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
('k8s_get_events', 'Kubernetes events read'),
('k8s_describe_pod', 'Kubernetes pod describe read'),
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
('ssh_diagnose', 'SSH host diagnosis read'),
('ssh_get_top_processes', 'SSH top processes read'),
('ssh_get_disk_usage', 'SSH disk usage read'),
('ssh_get_memory_info', 'SSH memory info read'),
('ssh_get_container_logs', 'SSH container logs read'),
('ssh_get_container_status', 'SSH container status read'),
('ssh_get_service_status', 'SSH service status read'),
('ssh_check_port', 'SSH port check read'),
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
('ssh_get_swap_info', 'SSH swap info read'),
('prometheus_query', 'Prometheus instant query read'),
('prometheus_query_range', 'Prometheus range query read'),
('prometheus_get_alert_history', 'Prometheus alert history read'),
('gold_metrics', 'SigNoz gold metrics read'),
('trace_url', 'SigNoz trace URL read'),
('system_metrics', 'SigNoz system metrics read'),
('query_logs', 'SigNoz logs read'),
('error_logs_summary', 'SigNoz error logs summary read'),
('list_approvals', 'Approval records read'),
('get_approval', 'Approval detail read'),
('list_incidents', 'Incident records read'),
('list_timeline', 'Timeline records read'),
('read_file', 'Filesystem allowlisted file read'),
('list_directory', 'Filesystem allowlisted directory read'),
('search_in_file', 'Filesystem allowlisted file search'),
('list_dashboards', 'Grafana dashboards read'),
('get_dashboard', 'Grafana dashboard read'),
('get_panel_data', 'Grafana panel data read'),
('generate_dashboard_url', 'Grafana dashboard URL read'),
('search_runbook', 'Runbook semantic search read'),
('get_index_stats', 'Runbook index stats read'),
('argocd_list_apps', 'ArgoCD apps read'),
('argocd_get_app_status', 'ArgoCD app status read'),
('argocd_get_sync_history', 'ArgoCD sync history read'),
('sentry_list_issues', 'Sentry issues read'),
('sentry_get_issue', 'Sentry issue detail read'),
('sentry_search_issues', 'Sentry issue search read')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM read_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id
),
grant_agents(agent_id) AS (
VALUES
('pre_decision_investigator'),
('post_execution_verifier')
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
grant_agents.agent_id,
upsert_tools.tool_id,
'migration:t7_mcp_gateway_read_seed',
'["read"]'::jsonb,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
CROSS JOIN grant_agents
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'awoooi_read_tools',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -0,0 +1,77 @@
-- Rollback for T7 awoooi read-only MCP Gateway seed.
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
WHERE project_id = 'awoooi'
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
AND is_revoked = FALSE;
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name IN (
'k8s_get_pod_logs',
'k8s_get_events',
'k8s_describe_pod',
'k8s_get_hpa_status',
'k8s_get_node_conditions',
'ssh_diagnose',
'ssh_get_top_processes',
'ssh_get_disk_usage',
'ssh_get_memory_info',
'ssh_get_container_logs',
'ssh_get_container_status',
'ssh_get_service_status',
'ssh_check_port',
'ssh_get_nginx_error_log',
'ssh_get_swap_info',
'prometheus_query',
'prometheus_query_range',
'prometheus_get_alert_history',
'gold_metrics',
'trace_url',
'system_metrics',
'query_logs',
'error_logs_summary',
'list_approvals',
'get_approval',
'list_incidents',
'list_timeline',
'read_file',
'list_directory',
'search_in_file',
'list_dashboards',
'get_dashboard',
'get_panel_data',
'generate_dashboard_url',
'search_runbook',
'get_index_stats',
'argocd_list_apps',
'argocd_get_app_status',
'argocd_get_sync_history',
'sentry_list_issues',
'sentry_get_issue',
'sentry_search_issues'
);
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
UPDATE awooop_contract_revisions
SET lifecycle_status = 'revoked'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
AND lifecycle_status = 'active';

View File

@@ -0,0 +1,213 @@
-- T7: awoooi read-only MCP Gateway seed
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3產生 first-class audit。
-- 邊界:只授權 read scope不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_seed(agent_id, display_name) AS (
VALUES
('pre_decision_investigator', 'Pre-decision Investigator'),
('post_execution_verifier', 'Post-execution Verifier')
),
agent_body AS (
SELECT
agent_id,
jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', agent_id,
'display_name', display_name,
'project_id', 'awoooi',
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
'allowed_scopes', jsonb_build_array('read'),
'forbidden_scopes', jsonb_build_array('write', 'admin'),
'stage', 't7_mcp_gateway_read_sense'
) AS body_json
FROM agent_seed
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
agent_id,
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t7_mcp_gateway_read_seed',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN (SELECT agent_id FROM agent_seed)
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
WITH read_tools(tool_name, description) AS (
VALUES
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
('k8s_get_events', 'Kubernetes events read'),
('k8s_describe_pod', 'Kubernetes pod describe read'),
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
('ssh_diagnose', 'SSH host diagnosis read'),
('ssh_get_top_processes', 'SSH top processes read'),
('ssh_get_disk_usage', 'SSH disk usage read'),
('ssh_get_memory_info', 'SSH memory info read'),
('ssh_get_container_logs', 'SSH container logs read'),
('ssh_get_container_status', 'SSH container status read'),
('ssh_get_service_status', 'SSH service status read'),
('ssh_check_port', 'SSH port check read'),
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
('ssh_get_swap_info', 'SSH swap info read'),
('prometheus_query', 'Prometheus instant query read'),
('prometheus_query_range', 'Prometheus range query read'),
('prometheus_get_alert_history', 'Prometheus alert history read'),
('gold_metrics', 'SigNoz gold metrics read'),
('trace_url', 'SigNoz trace URL read'),
('system_metrics', 'SigNoz system metrics read'),
('query_logs', 'SigNoz logs read'),
('error_logs_summary', 'SigNoz error logs summary read'),
('list_approvals', 'Approval records read'),
('get_approval', 'Approval detail read'),
('list_incidents', 'Incident records read'),
('list_timeline', 'Timeline records read'),
('read_file', 'Filesystem allowlisted file read'),
('list_directory', 'Filesystem allowlisted directory read'),
('search_in_file', 'Filesystem allowlisted file search'),
('list_dashboards', 'Grafana dashboards read'),
('get_dashboard', 'Grafana dashboard read'),
('get_panel_data', 'Grafana panel data read'),
('generate_dashboard_url', 'Grafana dashboard URL read'),
('search_runbook', 'Runbook semantic search read'),
('get_index_stats', 'Runbook index stats read'),
('argocd_list_apps', 'ArgoCD apps read'),
('argocd_get_app_status', 'ArgoCD app status read'),
('argocd_get_sync_history', 'ArgoCD sync history read'),
('sentry_list_issues', 'Sentry issues read'),
('sentry_get_issue', 'Sentry issue detail read'),
('sentry_search_issues', 'Sentry issue search read')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM read_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id
),
grant_agents(agent_id) AS (
VALUES
('pre_decision_investigator'),
('post_execution_verifier')
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
grant_agents.agent_id,
upsert_tools.tool_id,
'migration:t7_mcp_gateway_read_seed',
'["read"]'::jsonb,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
CROSS JOIN grant_agents
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'awoooi_read_tools',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;
-- v4 exists only to retrigger run-migration after Gitea skipped the v2->v3 rename-only push.

View File

@@ -0,0 +1,79 @@
-- Rollback for T7 awoooi read-only MCP Gateway seed.
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
WHERE project_id = 'awoooi'
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
AND is_revoked = FALSE;
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name IN (
'k8s_get_pod_logs',
'k8s_get_events',
'k8s_describe_pod',
'k8s_get_hpa_status',
'k8s_get_node_conditions',
'ssh_diagnose',
'ssh_get_top_processes',
'ssh_get_disk_usage',
'ssh_get_memory_info',
'ssh_get_container_logs',
'ssh_get_container_status',
'ssh_get_service_status',
'ssh_check_port',
'ssh_get_nginx_error_log',
'ssh_get_swap_info',
'prometheus_query',
'prometheus_query_range',
'prometheus_get_alert_history',
'gold_metrics',
'trace_url',
'system_metrics',
'query_logs',
'error_logs_summary',
'list_approvals',
'get_approval',
'list_incidents',
'list_timeline',
'read_file',
'list_directory',
'search_in_file',
'list_dashboards',
'get_dashboard',
'get_panel_data',
'generate_dashboard_url',
'search_runbook',
'get_index_stats',
'argocd_list_apps',
'argocd_get_app_status',
'argocd_get_sync_history',
'sentry_list_issues',
'sentry_get_issue',
'sentry_search_issues'
);
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
UPDATE awooop_contract_revisions
SET lifecycle_status = 'revoked'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
AND lifecycle_status = 'active';
-- v4 rollback companion for the retrigger migration.

View File

@@ -0,0 +1,77 @@
-- T16 verifier gap: allow rollout status evidence through AwoooP MCP Gateway.
-- Boundary: read-only scope only; no restart/delete/scale grant is added here.
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH upsert_tool AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
VALUES (
'awoooi',
'k8s_watch_rollout',
'mcp_server',
'Kubernetes deployment rollout status read',
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
)
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id
),
grant_agents(agent_id) AS (
VALUES
('pre_decision_investigator'),
('post_execution_verifier')
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
grant_agents.agent_id,
upsert_tool.tool_id,
'migration:t16_rollout_verifier_seed',
'["read"]'::jsonb,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tool
CROSS JOIN grant_agents
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'k8s_watch_rollout_read_grants' AS seed,
(SELECT count(*) FROM upsert_tool) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -0,0 +1,24 @@
-- Roll back T16 rollout verifier read grant seed.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'migration:t16_rollout_verifier_seed_down'
WHERE project_id = 'awoooi'
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND tool_id IN (
SELECT tool_id
FROM awooop_mcp_tool_registry
WHERE project_id = 'awoooi'
AND tool_name = 'k8s_watch_rollout'
);
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name = 'k8s_watch_rollout';

View File

@@ -0,0 +1,14 @@
-- AwoooP Phase 5bMCP Gateway blocked call 稽核覆蓋
-- 日期2026-05-06
-- 維護者Codex
--
-- Gate 1 / Gate 2 / 未知工具的 blocked call 可能發生在 tool registry row
-- 取得之前。這些安全決策仍必須落稽核紀錄,因此 tool_id 允許為 NULL
-- 但 tool_name 仍維持必填,作為未知工具與早期 gate block 的追蹤線索。
BEGIN;
ALTER TABLE awooop_mcp_gateway_audit
ALTER COLUMN tool_id DROP NOT NULL;
COMMIT;

View File

@@ -0,0 +1,21 @@
-- AwoooP Phase 7 T15b: inbound event truth-chain columns
--
-- Purpose:
-- Telegram cards are only the notification surface. Operators need a
-- redacted replay envelope for inbound alerts so Alertmanager, Sentry, and
-- SignOz events can be correlated with incidents, approvals, logs, and
-- automation decisions without storing raw secrets or PII.
ALTER TABLE awooop_conversation_event
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
COMMENT ON COLUMN awooop_conversation_event.content_redacted IS
'Full inbound event content after audit_sink redaction; raw unredacted payload text is not stored.';
COMMENT ON COLUMN awooop_conversation_event.redaction_version IS
'Redaction algorithm/version used for content_redacted and source_envelope.';
COMMENT ON COLUMN awooop_conversation_event.source_envelope IS
'Redacted source metadata for inbound replay/audit, including payload hash, provider, source refs, and log correlation hints.';

View File

@@ -0,0 +1,6 @@
-- Rollback for AwoooP Phase 7 T15b inbound truth-chain columns.
-- Safe only if no consumers depend on the redacted replay fields.
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS source_envelope;
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS redaction_version;
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS content_redacted;

View File

@@ -0,0 +1,21 @@
-- AwoooP Phase 7 T1: outbound message truth-chain columns
--
-- Purpose:
-- Telegram must remain a summary channel, but the operator console needs a
-- complete redacted replay of the rendered card and the source envelope that
-- produced it. Store redacted content only; raw unredacted Telegram text stays
-- out of PostgreSQL.
ALTER TABLE awooop_outbound_message
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
COMMENT ON COLUMN awooop_outbound_message.content_redacted IS
'Full rendered outbound content after audit_sink redaction; raw unredacted text is not stored.';
COMMENT ON COLUMN awooop_outbound_message.redaction_version IS
'Redaction algorithm/version used for content_redacted and source_envelope.';
COMMENT ON COLUMN awooop_outbound_message.source_envelope IS
'Redacted source metadata for replay/audit, including payload hash and adapter context.';

View File

@@ -0,0 +1,6 @@
-- Rollback for AwoooP Phase 7 T1 outbound truth-chain columns.
-- Safe only if no consumers depend on the redacted replay fields.
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS source_envelope;
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS redaction_version;
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS content_redacted;

View File

@@ -6,10 +6,12 @@
-- bge-m3 產生 1024 維向量,現有 schema vector(768) 不相容INSERT 會直接失敗
--
-- 影響範圍:
-- 1. rag_chunks.embedding vector(768) → vector(1024)
-- 2. playbook_embeddings.embedding vector(768) → vector(1024)
-- 1. knowledge_entries.embedding vector(768) → vector(1024)
-- 2. rag_chunks.embedding vector(768) → vector(1024)
-- 3. playbook_embeddings.embedding vector(768) → vector(1024)
--
-- 遷移策略:清空現有向量資料,切換維度後由 re-embed script 重新嵌入
-- 遷移策略:僅在欄位不是 vector(1024) 時清空現有向量資料,切換維度後由 re-embed script 重新嵌入
-- 已經是 vector(1024) 的環境重跑本 migration 時,必須保留既有向量資料。
-- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換)
--
-- 執行前置條件:
@@ -21,13 +23,69 @@
BEGIN;
-- 1. rag_chunks清空向量資料,變更欄位維度
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
DROP INDEX IF EXISTS idx_rag_chunks_embedding;
-- 1. knowledge_entries備份舊向量並清空,變更欄位維度
DO $$
DECLARE
v_dim integer;
BEGIN
SELECT a.atttypmod INTO v_dim
FROM pg_attribute a
JOIN pg_class c ON a.attrelid = c.oid
WHERE c.relname = 'knowledge_entries'
AND a.attname = 'embedding';
ALTER TABLE rag_chunks
ALTER COLUMN embedding TYPE vector(1024)
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
IF v_dim IS DISTINCT FROM 1024 THEN
EXECUTE $sql$
CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS
SELECT
id,
embedding::text AS embedding_768,
NOW() AS backed_up_at
FROM knowledge_entries
WHERE embedding IS NOT NULL
$sql$;
EXECUTE $sql$
ALTER TABLE knowledge_entries
ALTER COLUMN embedding TYPE vector(1024)
USING NULL
$sql$;
RAISE NOTICE 'knowledge_entries.embedding migrated from vector(%) to vector(1024); old embeddings were backed up and cleared', v_dim;
ELSE
RAISE NOTICE 'knowledge_entries.embedding already vector(1024); existing embeddings preserved';
END IF;
END $$;
COMMENT ON COLUMN knowledge_entries.embedding IS
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-05 ADR-110 follow-up)';
-- 2. rag_chunks清空向量資料變更欄位維度
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
DO $$
DECLARE
v_dim integer;
BEGIN
SELECT a.atttypmod INTO v_dim
FROM pg_attribute a
JOIN pg_class c ON a.attrelid = c.oid
WHERE c.relname = 'rag_chunks'
AND a.attname = 'embedding';
IF v_dim IS DISTINCT FROM 1024 THEN
EXECUTE 'DROP INDEX IF EXISTS idx_rag_chunks_embedding';
EXECUTE $sql$
ALTER TABLE rag_chunks
ALTER COLUMN embedding TYPE vector(1024)
USING NULL
$sql$;
RAISE NOTICE 'rag_chunks.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
ELSE
RAISE NOTICE 'rag_chunks.embedding already vector(1024); existing embeddings preserved';
END IF;
END $$;
-- 重建 ivfflat indexlists=100 適合 ~10k 筆以下資料)
CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding
@@ -39,12 +97,30 @@ COMMENT ON COLUMN rag_chunks.embedding IS
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)';
-- 2. playbook_embeddings清空向量資料變更欄位維度
DROP INDEX IF EXISTS ix_playbook_embeddings_vec;
-- 3. playbook_embeddings清空向量資料變更欄位維度
DO $$
DECLARE
v_dim integer;
BEGIN
SELECT a.atttypmod INTO v_dim
FROM pg_attribute a
JOIN pg_class c ON a.attrelid = c.oid
WHERE c.relname = 'playbook_embeddings'
AND a.attname = 'embedding';
ALTER TABLE playbook_embeddings
ALTER COLUMN embedding TYPE vector(1024)
USING NULL; -- 清空現有 768 維向量
IF v_dim IS DISTINCT FROM 1024 THEN
EXECUTE 'DROP INDEX IF EXISTS ix_playbook_embeddings_vec';
EXECUTE $sql$
ALTER TABLE playbook_embeddings
ALTER COLUMN embedding TYPE vector(1024)
USING NULL
$sql$;
RAISE NOTICE 'playbook_embeddings.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
ELSE
RAISE NOTICE 'playbook_embeddings.embedding already vector(1024); existing embeddings preserved';
END IF;
END $$;
CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec
ON playbook_embeddings
@@ -61,9 +137,15 @@ COMMENT ON TABLE playbook_embeddings IS
-- 3. 驗證遷移結果
DO $$
DECLARE
v_km_dim integer;
v_rag_dim integer;
v_pb_dim integer;
BEGIN
SELECT atttypmod INTO v_km_dim
FROM pg_attribute
JOIN pg_class ON attrelid = pg_class.oid
WHERE relname = 'knowledge_entries' AND attname = 'embedding';
SELECT atttypmod INTO v_rag_dim
FROM pg_attribute
JOIN pg_class ON attrelid = pg_class.oid
@@ -74,15 +156,18 @@ BEGIN
JOIN pg_class ON attrelid = pg_class.oid
WHERE relname = 'playbook_embeddings' AND attname = 'embedding';
-- atttypmod for vector(1024) = 1024 + 1 = 1025
IF v_rag_dim != 1025 THEN
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗expected 1025, got %', v_rag_dim;
-- pgvector atttypmod stores the configured dimension.
IF v_km_dim != 1024 THEN
RAISE EXCEPTION 'knowledge_entries.embedding 維度驗證失敗expected 1024, got %', v_km_dim;
END IF;
IF v_pb_dim != 1025 THEN
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗expected 1025, got %', v_pb_dim;
IF v_rag_dim != 1024 THEN
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗expected 1024, got %', v_rag_dim;
END IF;
IF v_pb_dim != 1024 THEN
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗expected 1024, got %', v_pb_dim;
END IF;
RAISE NOTICE '✅ embedding 遷移驗證通過rag_chunksplaybook_embeddings 均為 vector(1024)';
RAISE NOTICE '✅ embedding 遷移驗證通過:knowledge_entries、rag_chunksplaybook_embeddings 均為 vector(1024)';
END $$;
COMMIT;

View File

@@ -25,7 +25,7 @@
"log_anomaly": "deepseek-r1:14b",
"nemoclaw": "deepseek-r1:14b",
"playbook_draft": "qwen3:14b",
"code_review": "qwen2.5-coder:32b",
"code_review": "qwen2.5-coder:7b",
"embedding": "bge-m3:latest",
"rag_generate": "qwen3:14b",
"image_analysis": "minicpm-v:latest",
@@ -175,7 +175,7 @@
},
"pr_code_review": {
"phase": 32,
"model": "qwen2.5-coder:32b",
"model": "qwen2.5-coder:7b",
"timeout_seconds": 120,
"purpose": "Gitea PR 自動審查"
},

View File

@@ -9,7 +9,7 @@ AwoooP Phase 1 Batch 1 回填腳本
awooop_phase1_batch1_rls_2026-05-04.sql Step AADD COLUMN nullable已執行
執行方式:
export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
從 secret manager / operator vault 設定 DATABASE_URL禁止在指令或檔案中寫入 URL。
cd apps/api && python scripts/awooop_phase1_batch1_backfill.py
2026-05-04 ogt + Claude Sonnet 4.6ADR-118 Batch 1 C-3 修正)

View File

@@ -37,6 +37,7 @@ logging = structlog.get_logger(__name__)
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://34.143.170.20:11434")
EMBEDDING_MODEL = "bge-m3:latest"
EXPECTED_DIM = 1024
PROJECT_ID = os.getenv("AWOOOP_PROJECT_ID", "awoooi")
async def embed_text(client: httpx.AsyncClient, text: str) -> list[float]:
@@ -162,6 +163,7 @@ async def main(dry_run: bool, batch_size: int) -> None:
conn = await asyncpg.connect(database_url)
try:
await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID)
# 統計待嵌入筆數
rag_null = await conn.fetchval("SELECT COUNT(*) FROM rag_chunks WHERE embedding IS NULL")
pb_null = await conn.fetchval("SELECT COUNT(*) FROM playbook_embeddings WHERE embedding IS NULL")

View File

@@ -15,7 +15,7 @@ from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine
# 2026-04-22 ogt: 移除硬碼 changeme改為讀取環境變數強制要求設定
# 執行前: export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
# 執行前: 從 secret manager / operator vault 設定 DATABASE_URL禁止在指令或檔案中寫入 URL。
DATABASE_URL = os.environ["DATABASE_URL"]
MIGRATION_SQLS = [

View File

@@ -28,7 +28,7 @@ except ImportError:
# ============================================================================
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.188:11434")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.110:11435")
if not NVIDIA_API_KEY:
print("❌ 請設定 NVIDIA_API_KEY 環境變數")

View File

@@ -18,8 +18,15 @@ Endpoints:
from __future__ import annotations
import structlog
from fastapi import APIRouter, Query
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from src.services.adr100_remediation_service import (
RemediationMode,
RemediationNotFoundError,
get_adr100_remediation_service,
)
from src.services.adr100_slo_status_service import get_adr100_slo_status_service
from src.services.ai_slo_calculator import AiSloCalculator
logger = structlog.get_logger(__name__)
@@ -27,6 +34,20 @@ logger = structlog.get_logger(__name__)
router = APIRouter()
class RemediationPreviewRequest(BaseModel):
"""ADR-100 remediation preview request."""
work_item_id: str = Field(min_length=1)
mode: RemediationMode = "auto"
class RemediationDryRunRequest(BaseModel):
"""ADR-100 remediation dry-run request."""
work_item_id: str = Field(min_length=1)
mode: RemediationMode = "auto"
@router.get("/ai/slo")
async def get_ai_slo(
force_refresh: bool = Query(False, description="忽略快取,強制重算"),
@@ -50,9 +71,65 @@ async def get_ai_slo(
if cached:
data = cached.to_dict()
data["cache_hit"] = True
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
return data
report = await calc.run()
data = report.to_dict()
data["cache_hit"] = False
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
return data
@router.get("/ai/slo/remediation/preview")
async def preview_ai_slo_remediation(
work_item_id: str = Query(..., min_length=1),
mode: RemediationMode = Query("auto"),
) -> dict:
"""Preview the safe remediation plan for one ADR-100 queue item."""
try:
return await get_adr100_remediation_service().preview(work_item_id, mode)
except RemediationNotFoundError as exc:
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
@router.post("/ai/slo/remediation/preview")
async def preview_ai_slo_remediation_post(request: RemediationPreviewRequest) -> dict:
"""POST variant for clients that prefer JSON bodies."""
try:
return await get_adr100_remediation_service().preview(
request.work_item_id,
request.mode,
)
except RemediationNotFoundError as exc:
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
@router.post("/ai/slo/remediation/dry-run")
async def dry_run_ai_slo_remediation(request: RemediationDryRunRequest) -> dict:
"""Run a read-only ADR-100 remediation dry-run."""
try:
return await get_adr100_remediation_service().dry_run(
request.work_item_id,
request.mode,
)
except RemediationNotFoundError as exc:
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
@router.get("/ai/slo/remediation/history")
async def list_ai_slo_remediation_history(
limit: int = Query(50, ge=1, le=200),
incident_id: str | None = Query(default=None, min_length=1),
work_item_id: str | None = Query(default=None, min_length=1),
) -> dict:
"""List durable ADR-100 remediation dry-run history from alert_operation_log."""
return await get_adr100_remediation_service().history(
limit=limit,
incident_id=incident_id,
work_item_id=work_item_id,
)

View File

@@ -11,7 +11,7 @@ Endpoints:
Components Checked:
- PostgreSQL (192.168.0.188:5432)
- Redis (192.168.0.188:6380)
- Ollama (192.168.0.188:11434)
- Ollama (settings.OLLAMA_URL / ADR-110 provider pool)
- OpenClaw (192.168.0.188:8089)
- SigNoz (192.168.0.188:3301)
"""

View File

@@ -17,9 +17,10 @@ Phase 6.4 核心功能:
- Proposal 必須關聯到 Incident
"""
from datetime import UTC, datetime, timedelta
from typing import Any
from fastapi import APIRouter, HTTPException, status
from fastapi import APIRouter, HTTPException, Query, status
from pydantic import BaseModel, Field
from src.core.logging import get_logger
@@ -133,6 +134,7 @@ class IncidentTimelineResponse(BaseModel):
timeline: list[IncidentTimelineStage] = Field(default_factory=list)
events: list[IncidentTimelineEvent] = Field(default_factory=list)
ascii_timeline: str
reconciliation: dict[str, Any] = Field(default_factory=dict)
# =============================================================================
@@ -148,18 +150,26 @@ class IncidentTimelineResponse(BaseModel):
Phase 6.5 升級:
- 每個事件自動附帶 decision_token
- 確保 UI 永遠有決策可操作
- 雙軌引擎: LLM (主) + Expert System (備)
- 預設只讀取已存在的 decision_token
- 需要新決策時改由明確的 proposal / operator run 入口觸發
""",
)
async def list_incidents() -> IncidentListResponse:
async def list_incidents(
generate_missing_decisions: bool = Query(
False,
description=(
"預設 false列表查詢只讀既有 decision token"
"true 僅供明確維運操作使用,會背景產生缺少的決策。"
),
),
) -> IncidentListResponse:
"""
取得活躍事件清單
Phase 6.5: 自動為每個事件生成決策令牌
- P0/P1 事件優先處理
- 30 秒內保證有決策
- LLM 失敗時 Expert System 保底
Phase 6.5: 附帶既有決策令牌
- 列表查詢必須是低成本純讀路徑
- 不可因為前端輪詢就背景觸發 LLM / Ollama / OpenClaw
- 需要新決策時,呼叫 POST /api/v1/incidents/{incident_id}/proposal
Returns:
IncidentListResponse: 事件清單與計數 (含決策令牌)
@@ -174,8 +184,6 @@ async def list_incidents() -> IncidentListResponse:
# 按時間排序 (最新優先)
# 2026-03-26 修復: 處理 timezone-aware 與 naive datetime 混合問題
from datetime import UTC
def safe_created_at(i: Incident) -> float:
"""安全取得 timestamp處理 timezone 混合問題"""
dt = i.created_at
@@ -189,15 +197,24 @@ async def list_incidents() -> IncidentListResponse:
# 2026-04-09 Claude Sonnet 4.6: 效能修復 — list endpoint 不同步等待 AI
# 原設計: 每個 incident await AI 決策 (120-180s timeout),多 incident 時乘積爆炸
# 修復: 只取已存在的決策 token若無則背景觸發生成前端 poll 單筆 GET 取得結果
import asyncio
#
# 2026-05-06 Codex: 成本與推理槽修復 — 預設不再背景觸發 AI。
# 根因: 多個前端頁面會輪詢 GET /incidents若列表查詢偷偷 create_task
# 每次頁面載入都可能消耗 GCP Ollama / OpenClaw 推理槽,甚至 fallback 到 Gemini。
# 新規則: GET list 是純讀;生成新修復建議必須走明確 proposal/operator-run 入口。
if generate_missing_decisions:
import asyncio
responses = []
background_tasks = []
existing_tokens = await decision_manager._find_existing_tokens_for_incidents(
[incident.incident_id for incident in incidents]
)
for incident in incidents:
try:
# 只查已快取的決策 (不等待 AI立即返回)
existing = await decision_manager._find_existing_token(incident.incident_id)
existing = existing_tokens.get(incident.incident_id)
if existing:
decision_info = DecisionInfo(
token=existing.token,
@@ -207,17 +224,20 @@ async def list_incidents() -> IncidentListResponse:
)
responses.append(IncidentResponse.from_incident(incident, decision_info))
else:
# 無快取 → 背景觸發,本次返回 None(前端看到 decision=null 會 poll
# 無快取 → 本次返回 None。列表查詢預設不觸發 AI
# 前端若需要修復建議,必須呼叫明確的 proposal 入口。
responses.append(IncidentResponse.from_incident(incident, None))
if not generate_missing_decisions:
continue
# 2026-04-16 Claude Sonnet 4.6: 只對 48h 內的 incident 觸發 AI 分析
# 舊 incident token 每小時過期,若不限制會反覆重新分析歷史事件 → Telegram 洪水
from datetime import datetime, timezone, timedelta
_created = getattr(incident, "created_at", None)
_too_old = False
if _created:
if _created.tzinfo is None:
_created = _created.replace(tzinfo=timezone.utc)
_too_old = (_created < datetime.now(timezone.utc) - timedelta(hours=48))
_created = _created.replace(tzinfo=UTC)
_too_old = (_created < datetime.now(UTC) - timedelta(hours=48))
if not _too_old:
timeout = 120.0 if incident.severity in (Severity.P0, Severity.P1) else 180.0
background_tasks.append(
@@ -240,6 +260,7 @@ async def list_incidents() -> IncidentListResponse:
"incidents_listed",
count=len(incidents),
with_decisions=sum(1 for r in responses if r.decision is not None),
generate_missing_decisions=generate_missing_decisions,
)
return IncidentListResponse(

View File

@@ -9,14 +9,21 @@ ADR-106/ADR-107/ADR-114/ADR-115/ADR-116
from fastapi import APIRouter
from src.api.v1.platform.contracts import router as contracts_router
from src.api.v1.platform.events import router as events_router
from src.api.v1.platform.operator_runs import router as operator_runs_router
from src.api.v1.platform.runs import router as runs_router
from src.api.v1.platform.tenants import router as tenants_router
from src.api.v1.platform.truth_chain import router as truth_chain_router
router = APIRouter()
router.include_router(events_router)
router.include_router(truth_chain_router)
# 2026-05-06 Codex: FastAPI 依註冊順序比對路由。Operator Console 的
# `/runs/list` 必須排在 `/runs/{run_id}` 前面,否則 `list` 會被當成
# run_id造成前端 Run 監控頁 HTTP 422。
router.include_router(operator_runs_router)
router.include_router(runs_router)
router.include_router(tenants_router)
router.include_router(contracts_router)
router.include_router(operator_runs_router)
__all__ = ["router"]

View File

@@ -0,0 +1,338 @@
"""
AwoooP Operator Console — Channel Events API
============================================
提供 Operator Console 讀取 Communication Hub / legacy mirror 的事件摘要。
"""
from __future__ import annotations
from datetime import datetime
from typing import Any
from uuid import UUID
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from src.services.channel_event_dossier_service import (
RecurrenceWorkItemMode,
RecurrenceWorkItemNotFoundError,
fetch_channel_event_dossier,
fetch_channel_event_dossier_coverage,
fetch_channel_event_dossier_recurrence,
fetch_recurrence_work_item_dry_run,
fetch_recurrence_work_item_preview,
)
from src.services.platform_operator_service import list_recent_channel_events
router = APIRouter()
class ChannelEventItem(BaseModel):
event_id: UUID
project_id: str
channel_type: str
provider_event_id: str
channel_chat_id: str | None
content_preview: str | None
is_duplicate: bool
received_at: datetime
class RecentEventsResponse(BaseModel):
events: list[ChannelEventItem]
total: int
limit: int
class ChannelEventDossierItem(BaseModel):
event_id: UUID
project_id: str
channel_type: str
provider: str | None
stage: str
provider_event_id: str
content_preview: str | None
content_redacted: str | None
has_redacted_content: bool
redaction_version: str | None
source_url: str | None
content_sha256: str | None
content_length: int | None
source_refs: dict[str, Any]
source_ref_count: int
log_correlation: dict[str, Any]
alertname: str | None
severity: str | None
namespace: str | None
target_resource: str | None
fingerprint: str | None
is_duplicate: bool
provider_ts: datetime | None
received_at: datetime
class ChannelEventDossierSummary(BaseModel):
source_count: int
duplicate_total: int
redacted_total: int
source_ref_total: int
class ChannelEventDossierResponse(BaseModel):
events: list[ChannelEventDossierItem]
total: int
limit: int
summary: ChannelEventDossierSummary
class ChannelEventProviderCoverage(BaseModel):
provider: str
total: int
duplicate_total: int
redacted_total: int
source_ref_total: int
missing_source_refs_total: int
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
latest_received_at: datetime | None
class ChannelEventDossierCoverageSummary(BaseModel):
source_count: int
source_envelope_total: int
missing_source_envelope_total: int
with_source_refs_total: int
missing_source_refs_total: int
duplicate_total: int
redacted_total: int
source_ref_total: int
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
latest_received_at: datetime | None
class ChannelEventDossierCoverageResponse(BaseModel):
project_id: str
limit: int
summary: ChannelEventDossierCoverageSummary
providers: list[ChannelEventProviderCoverage]
class ChannelEventRecurrenceSummary(BaseModel):
source_event_total: int
recurrence_group_total: int
recurrent_group_total: int
duplicate_event_total: int
linked_run_total: int
unlinked_event_total: int
auto_repair_linked_total: int = 0
verified_repair_group_total: int = 0
open_work_item_group_total: int = 0
manual_gate_group_total: int = 0
automation_gap_group_total: int = 0
failed_repair_group_total: int = 0
latest_received_at: datetime | None
class ChannelEventRecurrenceItem(BaseModel):
recurrence_key: str
provider: str | None
alertname: str | None
severity: str | None
namespace: str | None
target_resource: str | None
fingerprint: str | None
latest_event_id: UUID | None
latest_provider_event_id: str | None
latest_content_preview: str | None
latest_run_id: UUID | None
latest_run_state: str | None
latest_agent_id: str | None
latest_incident_id: str | None = None
incident_ids: list[str] = Field(default_factory=list)
repair_summary: dict[str, Any] | None = None
work_item: dict[str, Any] | None = None
occurrence_total: int
duplicate_total: int
linked_run_total: int
source_ref_total: int
missing_source_refs_total: int
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
run_state_counts: dict[str, int]
first_received_at: datetime | None
latest_received_at: datetime | None
class ChannelEventRecurrenceResponse(BaseModel):
project_id: str
limit: int
summary: ChannelEventRecurrenceSummary
items: list[ChannelEventRecurrenceItem]
class RecurrenceWorkItemDryRunRequest(BaseModel):
"""AwoooP recurrence work item dry-run request."""
project_id: str | None = Field(default=None, min_length=1)
work_item_id: str = Field(min_length=1)
mode: RecurrenceWorkItemMode = "auto"
provider: str | None = Field(default=None, min_length=1)
limit: int = Field(default=300, ge=1, le=300)
@router.get(
"/events/dossier",
response_model=ChannelEventDossierResponse,
summary="查詢 Channel Event 來源卷宗",
description=(
"返回 redacted inbound source envelope供 AwoooP Run Detail 顯示"
"告警來源、source refs、Sentry / SignOz / Alertmanager 關聯與去重狀態。"
),
)
async def get_event_dossier(
project_id: str | None = Query(None, description="租戶 ID可選"),
run_id: UUID | None = Query(None, description="Run ID可選"),
provider_event_id: str | None = Query(
None, description="provider_event_id可選"
),
limit: int = Query(20, ge=1, le=50, description="最多返回筆數"),
) -> dict[str, Any]:
return await fetch_channel_event_dossier(
project_id=project_id,
run_id=run_id,
provider_event_id=provider_event_id,
limit=limit,
)
@router.get(
"/events/dossier/coverage",
response_model=ChannelEventDossierCoverageResponse,
summary="查詢 Channel Event 來源卷宗覆蓋率",
description=(
"返回近期 inbound event 的 source_envelope / source_refs / 去重 / "
"Sentry / SignOz 關聯覆蓋率,供 AwoooP Run List 顯示告警是否已入庫。"
),
)
async def get_event_dossier_coverage(
project_id: str | None = Query(None, description="租戶 ID可選"),
provider: str | None = Query(
None, description="provider可選如 sentry / signoz"
),
limit: int = Query(100, ge=1, le=200, description="最多納入統計筆數"),
) -> dict[str, Any]:
return await fetch_channel_event_dossier_coverage(
project_id=project_id,
provider=provider,
limit=limit,
)
@router.get(
"/events/dossier/recurrence",
response_model=ChannelEventRecurrenceResponse,
summary="查詢 Channel Event 重複發生與關聯 Run 狀態",
description=(
"將近期 inbound source events 依 fingerprint / alertname / namespace / target 分組,"
"顯示重複發生次數、去重數、source refs 與最新 linked run 狀態。"
),
)
async def get_event_dossier_recurrence(
project_id: str | None = Query(None, description="租戶 ID可選"),
provider: str | None = Query(
None, description="provider可選如 alertmanager / sentry / signoz"
),
limit: int = Query(100, ge=1, le=300, description="最多納入統計筆數"),
) -> dict[str, Any]:
return await fetch_channel_event_dossier_recurrence(
project_id=project_id,
provider=provider,
limit=limit,
)
@router.get(
"/events/dossier/recurrence/work-item/preview",
summary="預覽重複告警工作項的安全處理計畫",
description=(
"依 recurrence read model 找出指定 work_item返回下一步、pre-flight checks "
"與 read-only / no-write 保證;不修改 incident、auto-repair 或 ticket 狀態。"
),
)
async def preview_event_recurrence_work_item(
work_item_id: str = Query(..., min_length=1, description="recurrence work_item_id"),
project_id: str | None = Query(None, description="租戶 ID可選"),
provider: str | None = Query(
None, description="provider可選如 alertmanager / sentry / signoz"
),
mode: RecurrenceWorkItemMode = Query("auto", description="預覽模式"),
limit: int = Query(300, ge=1, le=300, description="最多納入統計筆數"),
) -> dict[str, Any]:
try:
return await fetch_recurrence_work_item_preview(
project_id=project_id,
work_item_id=work_item_id,
mode=mode,
provider=provider,
limit=limit,
)
except RecurrenceWorkItemNotFoundError as exc:
raise HTTPException(
status_code=404,
detail="recurrence_work_item_not_found",
) from exc
@router.post(
"/events/dossier/recurrence/work-item/dry-run",
summary="乾跑重複告警工作項的安全處理流程",
description=(
"依 recurrence read model 產生 dry-run 結果並寫入 pre-flight history"
"但不修改 incident、auto-repair 或 ticket 狀態。"
),
)
async def dry_run_event_recurrence_work_item(
request: RecurrenceWorkItemDryRunRequest,
) -> dict[str, Any]:
try:
return await fetch_recurrence_work_item_dry_run(
project_id=request.project_id,
work_item_id=request.work_item_id,
mode=request.mode,
provider=request.provider,
limit=request.limit,
)
except RecurrenceWorkItemNotFoundError as exc:
raise HTTPException(
status_code=404,
detail="recurrence_work_item_not_found",
) from exc
@router.get(
"/events/recent",
response_model=RecentEventsResponse,
summary="列出最近 Channel Events",
description=(
"返回 awooop_conversation_event 最近事件。"
"可用 channel_type / provider_prefix 過濾,例如 alert-group 收斂事件。"
),
)
async def list_recent_events(
project_id: str | None = Query(None, description="租戶 ID可選"),
channel_type: str | None = Query(None, description="通道類型(可選)"),
provider_prefix: str | None = Query(
None, description="provider_event_id 前綴(可選)"
),
limit: int = Query(20, ge=1, le=100, description="最多返回筆數"),
) -> dict[str, Any]:
return await list_recent_channel_events(
project_id=project_id,
channel_type=channel_type,
provider_prefix=provider_prefix,
limit=limit,
)

View File

@@ -15,12 +15,26 @@ from decimal import Decimal
from typing import Any, Literal
from uuid import UUID
from fastapi import APIRouter, Query
from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel, Field
from src.core.awooop_operator_auth import (
AwoooPOperatorPrincipal,
verify_awooop_operator,
)
from src.services.platform_operator_service import (
decide_approval as decide_approval_svc,
)
from src.services.platform_operator_service import (
get_run_detail as get_run_detail_svc,
)
from src.services.platform_operator_service import (
list_approvals as list_approvals_svc,
)
from src.services.platform_operator_service import (
list_callback_replies as list_callback_replies_svc,
)
from src.services.platform_operator_service import (
list_runs as list_runs_svc,
)
@@ -40,6 +54,8 @@ class RunItem(BaseModel):
step_count: int
created_at: datetime
timeout_at: datetime | None
remediation_summary: dict[str, Any] | None = None
callback_reply_summary: dict[str, Any] | None = None
class ListRunsResponse(BaseModel):
@@ -49,12 +65,43 @@ class ListRunsResponse(BaseModel):
per_page: int
class CallbackReplyItem(BaseModel):
message_id: UUID
run_id: UUID
project_id: str
status: str
needs_human: bool
action: str | None = None
incident_id: str | None = None
event_at: datetime | None = None
channel_type: str
message_type: str
send_status: str
send_error: str | None = None
provider_message_id: str | None = None
triggered_by_state: str | None = None
content_preview: str | None = None
run_state: str | None = None
agent_id: str | None = None
run_created_at: datetime | None = None
callback_reply: dict[str, Any]
run_detail_href: str | None = None
class ListCallbackRepliesResponse(BaseModel):
items: list[CallbackReplyItem]
total: int
page: int
per_page: int
class ApprovalItem(BaseModel):
run_id: UUID
project_id: str
agent_id: str
created_at: datetime
timeout_at: datetime | None
remediation_summary: dict[str, Any] | None = None
class ListApprovalsResponse(BaseModel):
@@ -65,7 +112,10 @@ class ListApprovalsResponse(BaseModel):
class DecideApprovalRequest(BaseModel):
project_id: str = Field(..., description="租戶 ID")
decision: Literal["approve", "reject"] = Field(..., description="核准或拒絕")
approver_id: str = Field(..., description="審核人 IDplatform_subject_id 或 operator email")
approver_id: str | None = Field(
default=None,
description="Deprecated. Ignored; approver comes from trusted operator headers.",
)
reason: str | None = Field(None, description="決策原因(可選)")
@@ -81,7 +131,8 @@ class DecideApprovalResponse(BaseModel):
response_model=ListRunsResponse,
summary="列出 Runs",
description=(
"返回 awooop_run_state 記錄,支援 project_id / state filter 與分頁。\n\n"
"返回 awooop_run_state 記錄,支援 project_id / state / remediation_status / "
"callback_reply_status / incident_id filter 與分頁。\n\n"
"- 按 created_at DESC 排序\n"
"- 注意:此路徑為 /runs/list 以避免與 runs.py 的 /runs/{run_id} 衝突"
),
@@ -89,14 +140,74 @@ class DecideApprovalResponse(BaseModel):
async def list_runs(
project_id: str | None = Query(None, description="租戶 ID可選"),
state: str | None = Query(None, description="Run 狀態 filter可選"),
remediation_status: str | None = Query(
None,
description="AI 證據狀態 filterno_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed",
),
callback_reply_status: str | None = Query(
None,
description="Telegram callback reply 狀態 filterno_callback/sent/fallback_sent/rescue_sent/failed/observed",
),
incident_id: str | None = Query(None, description="關聯 Incident ID filter可選"),
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
per_page: int = Query(_DEFAULT_PER_PAGE, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
) -> dict[str, Any]:
return await list_runs_svc(
project_id=project_id, state=state, page=page, per_page=per_page
project_id=project_id,
state=state,
remediation_status=remediation_status,
callback_reply_status=callback_reply_status,
incident_id=incident_id,
page=page,
per_page=per_page,
)
@router.get(
"/runs/callback-replies",
response_model=ListCallbackRepliesResponse,
summary="列出 Telegram Callback Reply Evidence",
description=(
"從 AwoooP outbound mirror 查詢 Telegram 詳情 / 歷史 callback reply 的"
"送達、fallback、救援與失敗證據只讀不修改 incident、run 或 Telegram 狀態。"
),
)
async def list_callback_replies(
project_id: str | None = Query(None, description="租戶 ID可選"),
callback_reply_status: str | None = Query(
None,
description="Telegram callback reply 狀態 filtersent/fallback_sent/rescue_sent/failed/observed/no_callback",
),
action: str | None = Query(None, description="Callback action filter例如 detail/history"),
incident_id: str | None = Query(None, description="關聯 Incident ID filter可選"),
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
per_page: int = Query(20, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
) -> dict[str, Any]:
return await list_callback_replies_svc(
project_id=project_id,
callback_reply_status=callback_reply_status,
action=action,
incident_id=incident_id,
page=page,
per_page=per_page,
)
@router.get(
"/runs/{run_id}/detail",
summary="查詢 Run 詳細時間線",
description=(
"返回單一 Run 的主狀態、Step Journal、MCP Gateway audit、"
"入站 Channel Event 與出站訊息,供 Operator Console 顯示完整處置脈絡。"
),
)
async def get_run_detail(
run_id: str,
project_id: str | None = Query(None, description="租戶 ID可選"),
) -> dict[str, Any]:
return await get_run_detail_svc(run_id=run_id, project_id=project_id)
@router.get(
"/approvals",
response_model=ListApprovalsResponse,
@@ -108,8 +219,17 @@ async def list_runs(
)
async def list_approvals(
project_id: str | None = Query(None, description="租戶 ID可選"),
run_id: str | None = Query(None, description="Run ID可選M8 詳情頁查單筆)"),
remediation_status: str | None = Query(
None,
description="AI 證據狀態 filterno_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed",
),
) -> dict[str, Any]:
return await list_approvals_svc(project_id=project_id)
return await list_approvals_svc(
project_id=project_id,
run_id=run_id,
remediation_status=remediation_status,
)
@router.post(
@@ -126,11 +246,12 @@ async def list_approvals(
async def decide_approval(
run_id: str,
body: DecideApprovalRequest,
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
) -> dict[str, Any]:
return await decide_approval_svc(
run_id=run_id,
project_id=body.project_id,
decision=body.decision,
approver_id=body.approver_id,
approver_id=operator.operator_id,
reason=body.reason,
)

View File

@@ -0,0 +1,64 @@
"""AwoooP Operator Console — truth-chain read API."""
from __future__ import annotations
from typing import Any
from fastapi import APIRouter, Depends, Query
from src.core.awooop_operator_auth import (
AwoooPOperatorPrincipal,
verify_awooop_operator,
)
from src.services.awooop_truth_chain_service import (
fetch_automation_quality_summary,
fetch_truth_chain,
)
router = APIRouter()
@router.get(
"/truth-chain/quality/summary",
summary="查詢 AI 自動化品質總覽",
description=(
"T12c read-only aggregate endpoint. 聚合最近 incident 的 automation quality gate"
"讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
"此總覽不回傳逐筆 examplessource-level truth-chain 詳情仍需 operator auth。"
),
)
async def get_automation_quality_summary(
project_id: str = Query("awoooi", description="租戶 ID"),
hours: int = Query(24, ge=1, le=168, description="回看小時數"),
limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
) -> dict[str, Any]:
summary = await fetch_automation_quality_summary(
project_id=project_id,
hours=hours,
limit=limit,
)
summary["examples"] = []
summary["visibility_note"] = (
"Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details."
)
return summary
@router.get(
"/truth-chain/{source_id}",
summary="查詢 Telegram / Incident / Drift 真相鏈",
description=(
"T0 read-only endpoint. 聚合 incident、approval、evidence、MCP、"
"automation_operation_log、drift repeat state 與 outbound mirror"
"讓 Operator Console 能判斷 Telegram 卡片目前卡在哪個流程節點。"
),
)
async def get_truth_chain(
source_id: str,
project_id: str = Query("awoooi", description="租戶 ID"),
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
) -> dict[str, Any]:
# operator dependency intentionally gates this read API even though the
# principal is not otherwise needed by the aggregation query.
_ = operator
return await fetch_truth_chain(source_id=source_id, project_id=project_id)

View File

@@ -8,9 +8,10 @@ leWOOOgo 原則: Router 只做 HTTP 轉發,業務邏輯在 KnowledgeRAGService
建立者: Claude Code (Phase 33 ADR-067)
"""
from fastapi import APIRouter, BackgroundTasks, HTTPException
from fastapi import APIRouter, BackgroundTasks
from pydantic import BaseModel
from src.core.config import get_settings
from src.services.knowledge_rag_service import get_knowledge_rag_service
router = APIRouter(prefix="/rag", tags=["RAG Knowledge Base"])
@@ -43,9 +44,10 @@ async def trigger_index(background_tasks: BackgroundTasks) -> RagIndexResponse:
- .agents/skills/*.md
"""
background_tasks.add_task(_run_index)
model = get_settings().OLLAMA_EMBEDDING_MODEL
return RagIndexResponse(
status="accepted",
message="索引已排程,背景執行中(nomic-embed-text @ Ollama 111",
message=f"索引已排程,背景執行中({model} @ Ollama GCP-A/GCP-B/111",
)
@@ -76,15 +78,16 @@ async def rag_debug() -> dict:
try:
async with httpx.AsyncClient(timeout=10.0) as c:
from src.core.config import get_settings as _gs
settings = _gs()
r = await c.post(
f"{_gs().OLLAMA_URL}/api/embeddings",
json={"model": "nomic-embed-text", "prompt": "test"},
f"{settings.OLLAMA_URL}/api/embeddings",
json={"model": settings.OLLAMA_EMBEDDING_MODEL, "prompt": "test"},
)
ollama_ok = r.status_code == 200 if r.status_code == 200 else f"http_{r.status_code}"
except Exception as e:
ollama_ok = f"error: {type(e).__name__}: {e}"
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_111_embed": ollama_ok}
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_embedding": ollama_ok}
@router.get("/stats", summary="索引統計")

View File

@@ -35,6 +35,7 @@ from src.models.approval import (
)
from src.services.anomaly_counter import get_anomaly_counter
from src.services.approval_db import get_approval_service
from src.services.channel_hub import record_external_alert_event
from src.services.openclaw_http_service import get_openclaw_http_service
from src.services.sentry_service import get_sentry_service
# 2026-04-27 P3.1-T2 by Claude — Tier-2 三服務感知強化:補 SentryWebhookService 簽章驗證
@@ -124,16 +125,60 @@ async def handle_sentry_error(
# 提取錯誤資訊
issue_data = payload.get("data", {}).get("issue", {})
event_data = payload.get("data", {}).get("event", {})
issue_id = issue_data.get("id")
source_url = (
issue_data.get("permalink")
or issue_data.get("web_url")
or issue_data.get("url")
)
background_tasks.add_task(
record_external_alert_event,
project_id="awoooi",
provider="sentry",
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
stage="received",
title=str(issue_data.get("title") or "Sentry issue"),
severity=str(issue_data.get("level") or "error"),
namespace="sentry",
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
source_url=source_url,
labels={
"project": issue_data.get("project", {}),
"level": issue_data.get("level"),
"culprit": issue_data.get("culprit"),
},
annotations={"message": event_data.get("message")},
payload=payload,
)
# Phase 10.2.1: 去重檢查 (10 分鐘內不重複發送)
issue_id = issue_data.get("id")
sentry_service = get_sentry_service()
if not await sentry_service.check_dedup(issue_id, ttl=SENTRY_DEDUP_TTL):
background_tasks.add_task(
record_external_alert_event,
project_id="awoooi",
provider="sentry",
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
stage="deduplicated",
title=str(issue_data.get("title") or "Sentry issue"),
severity=str(issue_data.get("level") or "error"),
namespace="sentry",
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
source_url=source_url,
labels={"project": issue_data.get("project", {}), "level": issue_data.get("level")},
annotations={"message": event_data.get("message")},
payload={"dedup_ttl": SENTRY_DEDUP_TTL},
is_duplicate=True,
)
return {"status": "deduplicated", "issue_id": issue_id, "ttl": SENTRY_DEDUP_TTL}
event_data = payload.get("data", {}).get("event", {})
error_context = {
"issue_id": issue_data.get("id"),
"source_url": source_url,
"title": issue_data.get("title"),
"culprit": issue_data.get("culprit"),
"level": issue_data.get("level"),
@@ -256,6 +301,29 @@ async def analyze_and_comment(
analysis=analysis,
anomaly_frequency=frequency_dict,
)
await record_external_alert_event(
project_id="awoooi",
provider="sentry",
event_id=str(issue_id or error_context.get("issue_id") or "unknown"),
stage="approval_linked",
title=str(error_context.get("title") or "Sentry issue"),
severity=str(error_context.get("level") or "error"),
namespace="sentry",
target_resource=str(error_context.get("culprit") or error_context.get("project") or "unknown"),
fingerprint=f"sentry-{issue_id or error_context.get('issue_id') or 'unknown'}",
approval_id=approval_id,
source_url=error_context.get("source_url"),
labels={
"project": error_context.get("project"),
"level": error_context.get("level"),
},
annotations={"message": error_context.get("message")},
payload={
"anomaly_frequency": frequency_dict,
"ai_analyzed": analysis is not None,
"ai_provider": analysis.analyzed_by if analysis else None,
},
)
# 4. 發送 Telegram 告警 (含頻率資訊)
await send_sentry_telegram_alert(

View File

@@ -18,6 +18,7 @@ AWOOOI API - SignOz Webhook Handler
"""
import uuid
from typing import TYPE_CHECKING
import structlog
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
@@ -37,10 +38,14 @@ from src.models.approval import (
)
from src.services.anomaly_counter import get_anomaly_counter
from src.services.approval_db import get_approval_service
from src.services.channel_hub import record_external_alert_event
from src.services.incident_service import get_incident_service
from src.services.telegram_gateway import get_telegram_gateway
from src.utils.timezone import now_taipei_iso
if TYPE_CHECKING:
from src.services.openclaw import LLMAnalysisResult
logger = structlog.get_logger(__name__)
router = APIRouter(prefix="/webhooks/signoz", tags=["SignOz Webhook"])
@@ -104,6 +109,26 @@ async def handle_signoz_alert(
labels = alert.get("labels", {})
annotations = alert.get("annotations", {})
severity = labels.get("severity", "warning")
source_url = alert.get("generatorURL")
service_name = labels.get("service_name", labels.get("service", "unknown"))
fingerprint = labels.get("fingerprint") or f"signoz-{alert_name}-{service_name}"
background_tasks.add_task(
record_external_alert_event,
project_id="awoooi",
provider="signoz",
event_id=str(fingerprint),
stage="received",
title=str(alert_name),
severity=str(severity),
namespace=str(labels.get("namespace", "signoz")),
target_resource=str(service_name),
fingerprint=str(fingerprint),
source_url=source_url,
labels=labels,
annotations=annotations,
payload=alert,
)
# 背景處理
background_tasks.add_task(
@@ -113,6 +138,8 @@ async def handle_signoz_alert(
annotations=annotations,
severity=severity,
starts_at=alert.get("startsAt"),
source_url=source_url,
raw_payload=alert,
)
results.append({
@@ -133,6 +160,8 @@ async def process_signoz_alert(
annotations: dict,
severity: str,
starts_at: str | None,
source_url: str | None = None,
raw_payload: dict | None = None,
):
"""
背景處理 SignOz 告警
@@ -190,6 +219,7 @@ async def process_signoz_alert(
"annotations": annotations,
"fingerprint": f"signoz-{alert_name}-{labels.get('service_name', 'unknown')}",
}
fingerprint = signal_data["fingerprint"]
# ADR-037: 傳遞頻率統計到 Incident
incident = await incident_service.create_incident_from_signal(
signal_data, frequency_stats=anomaly_frequency
@@ -229,6 +259,30 @@ async def process_signoz_alert(
anomaly_frequency=anomaly_frequency,
analysis_result=analysis_result, # 帶入 AI 結果
)
await record_external_alert_event(
project_id="awoooi",
provider="signoz",
event_id=str(fingerprint),
stage="incident_linked",
title=str(alert_name),
severity=str(severity),
namespace=str(labels.get("namespace", "signoz")),
target_resource=str(labels.get("service_name", labels.get("service", "unknown"))),
fingerprint=str(fingerprint),
incident_id=str(incident.incident_id),
approval_id=str(approval_id),
source_url=source_url or trace_url,
labels=labels,
annotations=annotations,
payload={
"raw_alert": raw_payload or {},
"trace_url": trace_url,
"has_signoz_metrics": bool(signoz_metrics),
"ai_provider": ai_provider,
"tokens": tokens,
"cost": cost,
},
)
# =================================================================
# Step 5: 發送 Telegram 告警

View File

@@ -19,6 +19,7 @@ Endpoints:
- 每個 Nonce 只能使用一次
"""
import asyncio
from uuid import UUID
from fastapi import APIRouter, HTTPException, status
@@ -27,6 +28,8 @@ from pydantic import BaseModel
from src.core.config import settings
from src.core.logging import get_logger
from src.services.approval_db import get_approval_service
from src.services.approval_execution import get_execution_service
from src.services.incident_approval_service import get_incident_approval_service
from src.services.security_interceptor import (
NonceReplayError,
UserNotWhitelistedError,
@@ -64,6 +67,80 @@ class TestPushRequest(BaseModel):
incident_id: str = ""
async def _run_telegram_approved_execution(approval) -> None:
"""Run the approved action that originated from a Telegram callback."""
approval_id = str(getattr(approval, "id", ""))
incident_id = getattr(approval, "incident_id", None)
try:
result = await get_execution_service().execute_approved_action(approval)
logger.info(
"telegram_approval_execution_completed",
approval_id=approval_id,
incident_id=incident_id,
success=bool(result),
)
except Exception as exc:
logger.error(
"telegram_approval_execution_failed",
approval_id=approval_id,
incident_id=incident_id,
error=str(exc),
)
def _schedule_telegram_approved_execution(approval) -> bool:
"""Schedule execution after Telegram approval reaches required signatures."""
try:
asyncio.create_task(_run_telegram_approved_execution(approval))
logger.info(
"telegram_approval_execution_scheduled",
approval_id=str(getattr(approval, "id", "")),
incident_id=getattr(approval, "incident_id", None),
)
return True
except Exception as exc:
logger.error(
"telegram_approval_execution_schedule_failed",
approval_id=str(getattr(approval, "id", "")),
incident_id=getattr(approval, "incident_id", None),
error=str(exc),
)
return False
async def _finalize_telegram_approval(approval, execution_triggered: bool) -> bool:
"""Complete the execution handoff for Telegram approvals.
ApprovalDBService only records the signature/status transition. The actual
executor scheduling lives in API callers, so Telegram must mirror the REST
approval endpoint instead of stopping at a visual approval stamp.
"""
if not execution_triggered:
return False
return _schedule_telegram_approved_execution(approval)
async def _sync_telegram_rejection(approval_id: str) -> bool:
"""Keep Incident state aligned when an approval is rejected from Telegram."""
try:
await get_incident_approval_service().on_approval_status_change(
approval_id=approval_id,
new_status="rejected",
)
logger.info(
"telegram_rejection_incident_synced",
approval_id=approval_id,
)
return True
except Exception as exc:
logger.error(
"telegram_rejection_incident_sync_failed",
approval_id=approval_id,
error=str(exc),
)
return False
# =============================================================================
# Endpoints
# =============================================================================
@@ -198,12 +275,17 @@ async def telegram_webhook(
)
if approval:
execution_scheduled = await _finalize_telegram_approval(
approval=approval,
execution_triggered=execution_triggered,
)
logger.info(
"telegram_approval_signed",
approval_id=approval_id,
user_id=user_id,
status=approval.status.value,
execution_triggered=execution_triggered,
execution_scheduled=execution_scheduled,
)
await _log_user_action("approve", True, getattr(approval, "incident_id", None))
@@ -213,6 +295,7 @@ async def telegram_webhook(
"approval_id": approval_id,
"status": approval.status.value,
"execution_triggered": execution_triggered,
"execution_scheduled": execution_scheduled,
}
elif action == "reject":
@@ -224,10 +307,12 @@ async def telegram_webhook(
)
if approval:
incident_synced = await _sync_telegram_rejection(approval_id)
logger.info(
"telegram_approval_rejected",
approval_id=approval_id,
user_id=user_id,
incident_synced=incident_synced,
)
await _log_user_action("reject", False, getattr(approval, "incident_id", None))
@@ -236,6 +321,7 @@ async def telegram_webhook(
"message": "Rejected",
"approval_id": approval_id,
"status": approval.status.value,
"incident_synced": incident_synced,
}
return {"ok": False, "message": "Unknown action"}

View File

@@ -33,14 +33,8 @@ from pydantic import BaseModel, Field
from src.core.config import settings
from src.core.constants import is_cicd_alertname, is_heartbeat_alertname
from src.services.alert_rule_engine import get_incident_type, match_rule
from src.services.action_parser import is_safe_kubectl_action
from src.services.security_interceptor import check_webhook_nonce # P0-06: nonce dedup via Service 層
from src.core.logging import get_logger
from src.core.metrics import record_alert_chain_success
# Phase 15.2: Trace Context (moved to SignalProducerService)
# get_trace_context 已移至 Service 層
from src.models.approval import (
ApprovalRequestCreate,
BlastRadius,
@@ -48,31 +42,43 @@ from src.models.approval import (
DryRunCheck,
RiskLevel,
)
# R4 #129 (2026-04-01 ogt): AlertPayload/AlertResponse 移至 models 層AlertAnalyzer 移至 services 層
# ogt 更新 v1.1 2026-04-01 台北時間: generate_alert_fingerprint 移至 alert_analyzer_service (ADR-024)
# [首席架構師] 移除 generate_alert_fingerprint 直接 import改用 AlertAnalyzer.generate_fingerprint v1.2 2026-04-01 Asia/Taipei
from src.models.webhook import AlertPayload, AlertResponse
from src.services.action_parser import is_safe_kubectl_action
from src.services.alert_analyzer_service import AlertAnalyzer
from src.services.alert_approval_guard import guard_alert_approval_action
from src.services.alert_grouping_service import get_alert_grouping_service
from src.services.alert_rule_engine import get_incident_type, match_rule
from src.services.alertmanager_llm_guard import (
ALERTMANAGER_LLM_INFLIGHT_LOCK_TTL_SECONDS,
try_acquire_alertmanager_llm_lock,
)
from src.services.approval_db import get_approval_service
from src.services.auto_approve import get_auto_approve_policy
from src.services.auto_repair_service import AutoRepairService
from src.services.channel_hub import (
record_alertmanager_event,
record_grouped_alert_event,
)
# Phase 15.2: Trace Context (moved to SignalProducerService)
# get_trace_context 已移至 Service 層
# R4 #129 (2026-04-01 ogt): AlertPayload/AlertResponse 移至 models 層AlertAnalyzer 移至 services 層
# ogt 更新 v1.1 2026-04-01 台北時間: generate_alert_fingerprint 移至 alert_analyzer_service (ADR-024)
# [首席架構師] 移除 generate_alert_fingerprint 直接 import改用 AlertAnalyzer.generate_fingerprint v1.2 2026-04-01 Asia/Taipei
# Phase 17 P0: Service 層 (消除 Router 直接存取 Redis)
# C2 修正 (首席架構師審查 2026-04-10): create_incident_for_approval + extract_affected_services 已移入 Service 層
from src.services.incident_service import (
classify_alert_early,
create_incident_for_approval,
extract_affected_services,
get_incident_service,
)
from src.services.auto_approve import get_auto_approve_policy
from src.services.auto_repair_service import AutoRepairService
# Phase 5: OpenClaw AI Engine
from src.services.openclaw import get_openclaw
from src.services.playbook_match_resolver import resolve_playbook_id_for_alert
from src.services.security_interceptor import check_webhook_nonce # P0-06: nonce dedup via Service 層
from src.services.signal_producer import SignalData, get_signal_producer
# Phase 5: Telegram Gateway (行動戰情室)
@@ -81,9 +87,6 @@ from src.services.telegram_gateway import TelegramGatewayError, get_telegram_gat
# Phase 18.1.7: K8s 資源名稱正規化 已移至 alert_analyzer_service (R4 #129)
from src.utils.timezone import now_taipei
# ADR-076: 告警聚合引擎 (2026-04-14 Claude Haiku 4.5 Asia/Taipei)
from src.services.alert_grouping_service import get_alert_grouping_service
router = APIRouter(prefix="/webhooks", tags=["Webhooks"])
logger = get_logger("awoooi.webhooks")
@@ -136,6 +139,38 @@ def _should_use_alertmanager_rule_first(
)
async def _analyze_alertmanager_with_timeout(
openclaw,
alert_context: dict,
*,
alert_id: str,
alertname: str,
) -> tuple:
"""Run Alertmanager AI analysis without letting it block the workflow forever."""
try:
return await asyncio.wait_for(
openclaw.analyze_alert(alert_context),
timeout=ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS,
)
except TimeoutError:
logger.warning(
"alertmanager_openclaw_timeout_fallback",
alert_id=alert_id,
alertname=alertname,
timeout_sec=ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS,
)
return None, "fallback_timeout", "", None, "", 0, 0.0
except Exception as exc:
logger.warning(
"alertmanager_openclaw_failed_fallback",
alert_id=alert_id,
alertname=alertname,
error=str(exc),
)
return None, "fallback_error", "", None, "", 0, 0.0
async def _escalate_auto_repair_unavailable(
*,
incident_id: str,
@@ -163,6 +198,19 @@ async def _escalate_auto_repair_unavailable(
)
def _auto_repair_action_label(result, fallback_target: str) -> str:
"""Build a verifier label that includes the actual playbook steps."""
playbook_id = getattr(result, "playbook_id", None) or "unknown"
steps = getattr(result, "executed_steps", None) or []
step_text = " | ".join(str(step) for step in steps).strip()
if not step_text:
step_text = fallback_target
step_text = " ".join(step_text.split())
if len(step_text) > 240:
step_text = f"{step_text[:237]}..."
return f"auto_repair_playbook:{playbook_id} {step_text}".strip()
async def _try_auto_repair_background(
incident_id: str,
approval_id: str,
@@ -252,6 +300,46 @@ async def _try_auto_repair_background(
},
)
_pre_execution_snapshot = None
try:
from src.core.feature_flags import aiops_flags
if aiops_flags.is_sub_flag_enabled("AIOPS_P1_PRE_DECISION_INVESTIGATOR"):
from src.services.evidence_snapshot import get_latest_snapshot
from src.services.post_execution_verifier import get_post_execution_verifier
_pre_execution_snapshot = await get_latest_snapshot(incident_id)
if _pre_execution_snapshot is None:
from src.services.pre_decision_investigator import (
get_pre_decision_investigator,
)
_pre_execution_snapshot = await asyncio.wait_for(
get_pre_decision_investigator().investigate(incident),
timeout=60.0,
)
if _pre_execution_snapshot is not None:
await asyncio.wait_for(
get_post_execution_verifier().capture_pre_execution_state(
incident,
_pre_execution_snapshot,
),
timeout=30.0,
)
except asyncio.TimeoutError:
logger.warning(
"auto_repair_pre_state_capture_timeout",
incident_id=incident_id,
approval_id=approval_id,
)
except Exception as _pre_state_err:
logger.warning(
"auto_repair_pre_state_capture_failed",
incident_id=incident_id,
approval_id=approval_id,
error=str(_pre_state_err),
)
# 執行自動修復
logger.info(
"auto_repair_executing",
@@ -263,6 +351,7 @@ async def _try_auto_repair_background(
playbook=decision.playbook,
is_cold_start=decision.is_cold_start,
similarity_score=decision.similarity_score,
run_post_verification=False,
)
logger.info(
@@ -273,6 +362,20 @@ async def _try_auto_repair_background(
# 記錄執行結果
if result:
try:
await get_approval_service().update_execution_status(
approval_id=approval_id,
success=result.success,
error_message=result.error,
)
except Exception as _approval_status_err:
logger.warning(
"auto_repair_approval_status_update_failed",
approval_id=approval_id,
incident_id=incident_id,
error=str(_approval_status_err),
)
await op_log.append(
"EXECUTION_COMPLETED",
incident_id=incident_id,
@@ -336,11 +439,10 @@ async def _try_auto_repair_background(
from src.services.evidence_snapshot import get_latest_snapshot
from src.services.learning_service import get_learning_service
_snapshot = await get_latest_snapshot(incident_id)
_action_label = (
f"{target_resource}:{namespace}"
if not result.success
else f"auto_repair_playbook:{result.playbook_id}"
_snapshot = _pre_execution_snapshot or await get_latest_snapshot(incident_id)
_action_label = _auto_repair_action_label(
result,
fallback_target=f"{target_resource}:{namespace}",
)
_verifier = get_post_execution_verifier()
_verify_result = await asyncio.wait_for(
@@ -792,6 +894,7 @@ async def verify_webhook_signature(
# 戰略 B: 滑動時間窗 (ADR-073: 5 分鐘改 30 分鐘,防同一問題反覆重建 Incident2026-04-12 ogt)
DEBOUNCE_WINDOW_MINUTES = 30
ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS = 90.0
# =============================================================================
@@ -1105,7 +1208,12 @@ async def receive_alert(
# 呼叫 OpenClaw LLM 分析 (v7.0 含 SignOz 整合)
# 2026-03-29 ogt: 加入 Token/Cost 追蹤
openclaw = get_openclaw()
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await _analyze_alertmanager_with_timeout(
openclaw,
alert_context,
alert_id=alert_id,
alertname=alert.alert_type,
)
if analysis_result:
# LLM 分析成功
@@ -1147,15 +1255,33 @@ async def receive_alert(
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE)
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg讓 extra_metadata 可觀測
_cmd_cs1 = (analysis_result.kubectl_command or "").strip()
_alertname_cs1 = str((alert.labels or {}).get("alertname") or alert.alert_type or "")
_guarded_action_cs1 = await guard_alert_approval_action(
action=(_cmd_cs1 or f"{analysis_result.action_title} | NO_ACTION"),
alert_namespace=alert.namespace,
alertname=_alertname_cs1,
alert_category=get_incident_type(_alertname_cs1),
)
_matched_playbook_id_cs1 = await resolve_playbook_id_for_alert(
alertname=_alertname_cs1,
affected_services=analysis_result.affected_services
or ([alert.target_resource] if alert.target_resource else []),
severity=risk_level.value,
)
if _guarded_action_cs1.blocked:
risk_level = RiskLevel.LOW
_cmd_cs1 = ""
_approval_metadata_cs1 = {
"source": ai_provider,
"confidence_score": analysis_result.confidence,
"is_rule_based": False,
"playbook_id": None,
"playbook_id": _matched_playbook_id_cs1,
**_guarded_action_cs1.metadata,
}
_cmd_cs1 = (analysis_result.kubectl_command or "").strip()
approval_create = ApprovalRequestCreate(
action=(_cmd_cs1 or f"{analysis_result.action_title} | NO_ACTION"),
action=_guarded_action_cs1.action,
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
risk_level=risk_level,
blast_radius=BlastRadius(
@@ -1172,6 +1298,7 @@ async def receive_alert(
],
requested_by=f"OpenClaw ({ai_provider})",
metadata=_approval_metadata_cs1,
matched_playbook_id=_matched_playbook_id_cs1,
)
suggested_action = analysis_result.kubectl_command
else:
@@ -1218,7 +1345,7 @@ async def receive_alert(
# 設計confidence ≥ 0.85 + 非 CRITICAL + 非破壞性 + 有 kubectl 指令 → 直接執行
# 安全防線CRITICAL / destructive patterns / NO_ACTION/INVESTIGATE/OBSERVE / 空 kubectl → 降級 PENDING
if analysis_result:
_cs1_kubectl = analysis_result.kubectl_command.strip() if analysis_result.kubectl_command else ""
_cs1_kubectl = _cmd_cs1
_cs1_can_auto = (
bool(_cs1_kubectl)
and analysis_result.confidence >= 0.85
@@ -1239,7 +1366,7 @@ async def receive_alert(
required_signatures=0,
status=ApprovalStatus.APPROVED,
risk_level=risk_level.value,
matched_playbook_id=None,
matched_playbook_id=_matched_playbook_id_cs1,
metadata={
**_approval_metadata_cs1,
"is_high_confidence": True,
@@ -1420,6 +1547,39 @@ class AlertmanagerPayload(BaseModel):
alerts: list[AlertmanagerAlert]
_CICD_JOB_STATUSES = frozenset({"running", "success", "failed", "pending"})
def _cicd_job_status_from_alert(alert: AlertmanagerAlert) -> str:
"""將 CI/CD Alertmanager label 轉成 TelegramGateway 支援的狀態。
2026-05-12 Codex: Gitea workflow 先送進 AWOOI API不能只靠
severity=info 推 success否則 failed/pending 事件進 AwoooP 後語義會失真。
"""
labels = alert.labels or {}
for key in ("status", "job_status", "ci_status"):
value = str(labels.get(key) or "").strip().lower()
if value in _CICD_JOB_STATUSES:
return value
severity = str(labels.get("severity") or "").strip().lower()
if severity == "info":
return "success"
if severity in {"critical", "error"}:
return "failed"
return "running"
def _cicd_duration_seconds_from_alert(alert: AlertmanagerAlert) -> int:
labels = alert.labels or {}
raw = labels.get("duration_seconds") or labels.get("duration") or 0
try:
value = int(str(raw).strip())
except (TypeError, ValueError):
return 0
return max(value, 0)
def is_internal_ip(client_ip: str) -> bool:
"""檢查是否為內網 IP"""
import ipaddress
@@ -1456,6 +1616,11 @@ async def _process_new_alert_background(
try:
service = get_approval_service()
openclaw = get_openclaw()
traced_alert_labels = {
**(alert_labels or {}),
"fingerprint": fingerprint,
"alert_id": alert_id,
}
rule_response = match_rule(alert_context)
should_bypass_llm = _should_use_alertmanager_rule_first(rule_response, alert_category)
@@ -1489,7 +1654,6 @@ async def _process_new_alert_background(
str(blast.get("data_impact", "NONE")).upper(),
DataImpact.NONE,
)
rule_action_title = str(rule_response.get("action_title", "人工排查主機告警"))
rule_kubectl = str(rule_response.get("kubectl_command", "")).strip()
rule_description = str(rule_response.get("description", message))
rule_action = (
@@ -1497,13 +1661,31 @@ async def _process_new_alert_background(
if rule_kubectl else
f"NO_ACTION - {rule_description[:120]}"
)
_matched_playbook_id_cs2 = await resolve_playbook_id_for_alert(
rule_id=str(rule_response.get("rule_id", "")),
alertname=alertname,
affected_services=[target_resource] if target_resource else [],
severity=rule_risk.value,
)
_guarded_action_cs2 = await guard_alert_approval_action(
action=rule_action,
alert_namespace=namespace,
alertname=alertname,
alert_category=alert_category,
)
if _guarded_action_cs2.blocked:
rule_action = _guarded_action_cs2.action
rule_kubectl = ""
rule_risk = RiskLevel.LOW
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg讓 extra_metadata 可觀測
_approval_metadata_cs2 = {
"source": "rule_engine",
"confidence_score": float(rule_response.get("confidence", 0.0) or 0.0),
"is_rule_based": True,
"playbook_id": str(rule_response.get("rule_id", "")) or None,
"rule_id": str(rule_response.get("rule_id", "")) or None,
"playbook_id": _matched_playbook_id_cs2,
**_guarded_action_cs2.metadata,
}
approval_create = ApprovalRequestCreate(
action=rule_action,
@@ -1534,6 +1716,7 @@ async def _process_new_alert_background(
],
requested_by="OpenClaw (rule-engine)",
metadata=_approval_metadata_cs2,
matched_playbook_id=_matched_playbook_id_cs2,
)
approval = await service.create_approval_with_fingerprint(
@@ -1565,6 +1748,10 @@ async def _process_new_alert_background(
# 2026-04-27 ogt + Claude Sonnet 4.6: CS2 規則引擎自動執行
# 設計is_rule_based=True 確定性高,滿足條件直接執行,不等人工審核
# 安全防線CRITICAL / destructive patterns / NO_ACTION / 空 kubectl → 全部降級 PENDING
_cs2_auto_approval = None
_cs2_executor = None
_cs2_exec_success: bool | None = None
_cs2_exec_error: str | None = None
try:
from src.models.approval import ApprovalRequest, ApprovalStatus
from src.services.approval_execution import ApprovalExecutionService
@@ -1584,10 +1771,11 @@ async def _process_new_alert_background(
required_signatures=0,
status=ApprovalStatus.APPROVED,
risk_level=rule_risk.value,
matched_playbook_id=_approval_metadata_cs2.get("playbook_id"),
matched_playbook_id=_matched_playbook_id_cs2,
)
# 使用 DB 中剛建立的 approval.id 讓 executor 可回寫
_auto_approval.id = approval.id
_cs2_auto_approval = _auto_approval
_cs2_executor = ApprovalExecutionService()
_cs2_exec_success = await _cs2_executor.execute_approved_action(_auto_approval)
@@ -1610,6 +1798,8 @@ async def _process_new_alert_background(
exec_success=_cs2_exec_success,
)
except Exception as _auto_err:
_cs2_exec_success = False if _cs2_auto_approval is not None else None
_cs2_exec_error = str(_auto_err)
logger.warning(
"cs2_auto_execute_failed_degraded_to_pending",
approval_id=str(approval.id),
@@ -1625,7 +1815,7 @@ async def _process_new_alert_background(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert_labels,
alert_labels=traced_alert_labels,
notification_type=notification_type,
alert_category=alert_category,
)
@@ -1641,6 +1831,41 @@ async def _process_new_alert_background(
error=str(_meta_err),
)
await record_alertmanager_event(
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type=notification_type,
alert_category=alert_category,
incident_id=incident_id,
approval_id=str(approval.id),
repeat_count=1,
labels=traced_alert_labels,
annotations=alert_context.get("annotations", {}),
)
if _cs2_auto_approval is not None and _cs2_exec_success is not None:
try:
_cs2_auto_approval.incident_id = incident_id
_cs2_executor = _cs2_executor or ApprovalExecutionService()
await _cs2_executor.finalize_auto_approved_execution(
_cs2_auto_approval,
success=_cs2_exec_success,
error_message=_cs2_exec_error,
)
except Exception as _cs2_finalize_err:
logger.warning(
"cs2_auto_execute_finalize_failed",
approval_id=str(approval.id),
incident_id=incident_id,
error=str(_cs2_finalize_err),
)
_is_heartbeat = is_heartbeat_alertname(alertname)
if can_auto_repair and not _is_heartbeat:
await _try_auto_repair_background(
@@ -1694,7 +1919,12 @@ async def _process_new_alert_background(
record_alert_chain_success("alertmanager")
return
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await _analyze_alertmanager_with_timeout(
openclaw,
alert_context,
alert_id=alert_id,
alertname=alertname,
)
if analysis_result:
risk_mapping = {
@@ -1724,15 +1954,34 @@ async def _process_new_alert_background(
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE) if blast else DataImpact.NONE
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg讓 extra_metadata 可觀測
_cmd_cs3 = (analysis_result.kubectl_command or "").strip()
_guarded_action_cs3 = await guard_alert_approval_action(
action=(_cmd_cs3 or f"{analysis_result.action_title} | NO_ACTION"),
alert_namespace=namespace,
alertname=alertname,
alert_category=alert_category,
)
_matched_playbook_id_cs3 = await resolve_playbook_id_for_alert(
rule_id=str(rule_response.get("rule_id", "")),
alertname=alertname,
affected_services=analysis_result.affected_services
or ([target_resource] if target_resource else []),
severity=risk_level.value,
)
if _guarded_action_cs3.blocked:
risk_level = RiskLevel.LOW
_cmd_cs3 = ""
_approval_metadata_cs3 = {
"source": ai_provider,
"confidence_score": analysis_result.confidence,
"is_rule_based": False,
"playbook_id": None,
"rule_id": str(rule_response.get("rule_id", "")) or None,
"playbook_id": _matched_playbook_id_cs3,
**_guarded_action_cs3.metadata,
}
_cmd_cs3 = (analysis_result.kubectl_command or "").strip()
approval_create = ApprovalRequestCreate(
action=(_cmd_cs3 or f"{analysis_result.action_title} | NO_ACTION"),
action=_guarded_action_cs3.action,
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
risk_level=risk_level,
blast_radius=BlastRadius(
@@ -1747,6 +1996,7 @@ async def _process_new_alert_background(
],
requested_by=f"OpenClaw ({ai_provider})",
metadata=_approval_metadata_cs3,
matched_playbook_id=_matched_playbook_id_cs3,
)
approval = await service.create_approval_with_fingerprint(
@@ -1760,7 +2010,7 @@ async def _process_new_alert_background(
"risk_level": risk_level.value,
"confidence": analysis_result.confidence,
"action": approval_create.action,
"kubectl_command": analysis_result.kubectl_command,
"kubectl_command": _cmd_cs3,
"is_rule_based": False,
"source": ai_provider,
}
@@ -1776,7 +2026,7 @@ async def _process_new_alert_background(
logger.warning("shadow_auto_approve_failed", error=str(_shadow_err_cs3))
# 2026-04-27 Claude Sonnet 4.6: CS3 LLM 高信心自動執行修法3擴展
_cs3_kubectl = (analysis_result.kubectl_command or "").strip()
_cs3_kubectl = _cmd_cs3
_cs3_can_auto = (
bool(_cs3_kubectl)
and analysis_result.confidence >= 0.85
@@ -1784,8 +2034,15 @@ async def _process_new_alert_background(
and "NO_ACTION" not in (analysis_result.action_title or "")
and is_safe_kubectl_action(_cs3_kubectl)
)
_cs3_auto_approval = None
_cs3_executor = None
_cs3_exec_success: bool | None = None
_cs3_exec_error: str | None = None
if _cs3_can_auto:
try:
from src.models.approval import ApprovalRequest, ApprovalStatus
from src.services.approval_execution import ApprovalExecutionService
_cs3_auto_approval = ApprovalRequest(
action=approval_create.action,
description=approval_create.description,
@@ -1793,7 +2050,7 @@ async def _process_new_alert_background(
required_signatures=0,
status=ApprovalStatus.APPROVED,
risk_level=risk_level.value,
matched_playbook_id=None,
matched_playbook_id=_matched_playbook_id_cs3,
metadata={
**_approval_metadata_cs3,
"is_high_confidence": True,
@@ -1802,8 +2059,17 @@ async def _process_new_alert_background(
else "cs3_auto_confident_execution",
},
)
_cs3_auto_approval.id = approval.id
_cs3_executor = ApprovalExecutionService()
_cs3_exec_success = await _cs3_executor.execute_approved_action(_cs3_auto_approval)
try:
await service.update_execution_status(approval.id, _cs3_exec_success)
except Exception as _cs3_upd_err:
logger.warning(
"cs3_auto_execute_status_update_failed",
approval_id=str(approval.id),
error=str(_cs3_upd_err),
)
logger.info(
"cs3_llm_auto_executed",
approval_id=str(approval.id),
@@ -1819,6 +2085,8 @@ async def _process_new_alert_background(
),
)
except Exception as _cs3_exec_err:
_cs3_exec_success = False if _cs3_auto_approval is not None else None
_cs3_exec_error = str(_cs3_exec_err)
logger.warning("cs3_llm_auto_execute_failed", error=str(_cs3_exec_err))
incident_id = await create_incident_for_approval(
@@ -1830,7 +2098,7 @@ async def _process_new_alert_background(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert_labels,
alert_labels=traced_alert_labels,
notification_type=notification_type,
alert_category=alert_category,
)
@@ -1846,6 +2114,41 @@ async def _process_new_alert_background(
error=str(_meta_err),
)
await record_alertmanager_event(
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type=notification_type,
alert_category=alert_category,
incident_id=incident_id,
approval_id=str(approval.id),
repeat_count=1,
labels=traced_alert_labels,
annotations=alert_context.get("annotations", {}),
)
if _cs3_auto_approval is not None and _cs3_exec_success is not None:
try:
_cs3_auto_approval.incident_id = incident_id
_cs3_executor = _cs3_executor or ApprovalExecutionService()
await _cs3_executor.finalize_auto_approved_execution(
_cs3_auto_approval,
success=_cs3_exec_success,
error_message=_cs3_exec_error,
)
except Exception as _cs3_finalize_err:
logger.warning(
"cs3_auto_execute_finalize_failed",
approval_id=str(approval.id),
incident_id=incident_id,
error=str(_cs3_finalize_err),
)
root_cause = analysis_result.description or message
estimated_downtime = blast.estimated_downtime if blast else "~30s"
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
@@ -1895,7 +2198,7 @@ async def _process_new_alert_background(
risk_level=risk_level.value,
resource_name=target_resource,
root_cause=root_cause,
suggested_action=(analysis_result.kubectl_command or "").strip() or analysis_result.suggested_action.value,
suggested_action=approval_create.action,
estimated_downtime=estimated_downtime,
hit_count=1,
primary_responsibility=primary_responsibility,
@@ -1921,11 +2224,17 @@ async def _process_new_alert_background(
else:
# LLM 失敗 - 使用預設值
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg讓 extra_metadata 可觀測
_matched_playbook_id_cs4 = await resolve_playbook_id_for_alert(
rule_id=str(rule_response.get("rule_id", "")),
alertname=alertname,
affected_services=[target_resource] if target_resource else [],
severity="medium",
)
_approval_metadata_cs4 = {
"source": "fallback",
"confidence_score": None,
"is_rule_based": False,
"playbook_id": None,
"playbook_id": _matched_playbook_id_cs4,
}
fallback_create = ApprovalRequestCreate(
action="OBSERVE",
@@ -1940,6 +2249,7 @@ async def _process_new_alert_background(
dry_run_checks=[],
requested_by="OpenClaw (fallback)",
metadata=_approval_metadata_cs4,
matched_playbook_id=_matched_playbook_id_cs4,
)
approval = await service.create_approval_with_fingerprint(
@@ -1977,7 +2287,7 @@ async def _process_new_alert_background(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert_labels,
alert_labels=traced_alert_labels,
notification_type=notification_type,
alert_category=alert_category,
)
@@ -1993,6 +2303,55 @@ async def _process_new_alert_background(
error=str(_meta_err),
)
await record_alertmanager_event(
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type=notification_type,
alert_category=alert_category,
incident_id=fallback_incident_id,
approval_id=str(approval.id),
repeat_count=1,
labels=traced_alert_labels,
annotations=alert_context.get("annotations", {}),
)
_is_heartbeat = is_heartbeat_alertname(alertname)
if can_auto_repair and not _is_heartbeat:
await _try_auto_repair_background(
incident_id=fallback_incident_id,
approval_id=str(approval.id),
alert_type=alert_type,
target_resource=target_resource,
namespace=namespace,
)
elif not can_auto_repair and not _is_heartbeat:
from src.repositories.alert_operation_log_repository import get_alert_operation_log_repository
_op_log_fallback = get_alert_operation_log_repository()
await _op_log_fallback.append(
"GUARDRAIL_BLOCKED",
incident_id=fallback_incident_id,
approval_id=str(approval.id),
actor="prometheus-rule",
action_detail=f"Prometheus rule 設定 auto_repair=falsefallback 轉人工: {alertname}",
success=False,
context={"alertname": alertname, "auto_repair_flag": False},
)
await _escalate_auto_repair_unavailable(
incident_id=fallback_incident_id,
approval_id=str(approval.id),
alert_type=alert_type,
target_resource=target_resource,
namespace=namespace,
failure_reason="Prometheus rule auto_repair=falsefallback 未進入自動修復評估",
attempted_actions="llm_fallback -> guardrail:auto_repair_false -> emergency_intervention",
)
await _push_to_telegram_background(
approval_id=str(approval.id),
risk_level="medium",
@@ -2125,11 +2484,12 @@ async def alertmanager_webhook(
telegram = get_telegram_gateway()
# 解析 CI/CD 狀態
stage = alert.labels.get("stage", "")
job_status = "success" if alert.labels.get("severity") == "info" else "running"
job_status = _cicd_job_status_from_alert(alert)
commit_sha = alert.labels.get("commit", "")
triggered_by = alert.labels.get("triggered_by", "CI")
workflow_url = alert.annotations.get("workflow_url", "")
summary = alert.annotations.get("summary", alertname)
detail_message = alert.annotations.get("description", "")
await telegram.send_cicd_progress(
job_name=summary,
@@ -2137,6 +2497,8 @@ async def alertmanager_webhook(
stage=stage,
commit_sha=commit_sha,
triggered_by=triggered_by,
duration_seconds=_cicd_duration_seconds_from_alert(alert),
message=detail_message,
workflow_url=workflow_url,
)
@@ -2236,6 +2598,22 @@ async def alertmanager_webhook(
target=target_resource,
fingerprint=fingerprint,
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="received",
notification_type=notification_type,
alert_category=alert_category,
source_url=alert.generatorURL,
labels=dict(alert.labels) if alert.labels else {},
annotations=dict(alert.annotations) if alert.annotations else {},
)
# ==========================================================================
# ADR-076: 告警聚合引擎 — 5 分鐘滑動視窗,防止告警風暴
@@ -2266,6 +2644,19 @@ async def alertmanager_webhook(
parent_fingerprint=grouping_result.parent_fingerprint,
reason="Alert storm suppressed — child alert within 5-min window",
)
background_tasks.add_task(
record_grouped_alert_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
group_key=grouping_result.group_key,
count=grouping_result.count,
parent_fingerprint=grouping_result.parent_fingerprint,
fingerprint=fingerprint,
)
return AlertResponse(
success=True,
message=(
@@ -2305,6 +2696,26 @@ async def alertmanager_webhook(
hit_count=updated_approval.hit_count,
reason="Converged alert - Telegram already sent for this fingerprint",
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="converged",
notification_type=notification_type,
alert_category=alert_category,
incident_id=getattr(updated_approval, "incident_id", None),
approval_id=str(updated_approval.id),
repeat_count=updated_approval.hit_count,
is_duplicate=True,
source_url=alert.generatorURL,
labels=dict(alert.labels) if alert.labels else {},
annotations=dict(alert.annotations) if alert.annotations else {},
)
return AlertResponse(
success=True,
@@ -2332,10 +2743,27 @@ async def alertmanager_webhook(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert.labels,
alert_labels={**alert.labels, "fingerprint": fingerprint, "alert_id": alert_id},
notification_type="TYPE-1",
alert_category=alert_category,
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type="TYPE-1",
alert_category=alert_category,
incident_id=_info_incident_id,
source_url=alert.generatorURL,
labels={**alert.labels, "fingerprint": fingerprint, "alert_id": alert_id},
annotations=dict(alert.annotations) if alert.annotations else {},
)
# 2026-04-15 ogt: TYPE-1 純資訊告警建立後立即關閉
# 設計原則: backup/heartbeat/info 告警無需追蹤狀態,通知即完成
# 防止 incidents 表無限累積 INVESTIGATING 記錄ADR-073 漏洞修補)
@@ -2355,7 +2783,7 @@ async def alertmanager_webhook(
record_alert_chain_success("alertmanager")
return AlertResponse(
success=True,
message=f"✅ TYPE-1 純資訊告警已通知 (no LLM)",
message="✅ TYPE-1 純資訊告警已通知 (no LLM)",
alert_id=alert_id,
approval_created=False,
)
@@ -2367,6 +2795,23 @@ async def alertmanager_webhook(
fingerprint=fingerprint,
ttl_seconds=ALERTMANAGER_LLM_INFLIGHT_LOCK_TTL_SECONDS,
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="llm_inflight_suppressed",
notification_type=notification_type,
alert_category=alert_category,
is_duplicate=True,
source_url=alert.generatorURL,
labels=dict(alert.labels) if alert.labels else {},
annotations=dict(alert.annotations) if alert.annotations else {},
)
return AlertResponse(
success=True,
message="🛡️ 告警已由同指紋背景 AI 分析處理中,跳過重複 LLM 呼叫",

View File

@@ -0,0 +1,126 @@
"""
AwoooP Operator authentication boundary.
ADR-116 Gate 5 approval decisions must not trust browser-supplied identities.
This module accepts a short-lived operator identity only when it is paired with
the server-side AwoooP operator key.
"""
from __future__ import annotations
import re
import secrets
from dataclasses import dataclass
from typing import Annotated
import structlog
from fastapi import Header, HTTPException, status
from src.core.config import settings
logger = structlog.get_logger(__name__)
_OPERATOR_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.:@-]{1,127}$")
_PROD_ENVS = {"prod", "production"}
@dataclass(frozen=True, slots=True)
class AwoooPOperatorPrincipal:
"""Authenticated AwoooP operator principal."""
operator_id: str
auth_method: str
def _auth_error(detail: str = "Operator authentication required") -> HTTPException:
return HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=detail)
def _clean_operator_id(operator_id: str | None) -> str:
if operator_id is None:
raise _auth_error()
cleaned = operator_id.strip()
if not _OPERATOR_ID_RE.fullmatch(cleaned):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail="Invalid operator identity",
)
return cleaned
def authenticate_awooop_operator_headers(
operator_id: str | None,
operator_key: str | None,
*,
configured_key: str | None = None,
environment: str | None = None,
) -> AwoooPOperatorPrincipal:
"""Validate trusted AwoooP operator headers.
Args:
operator_id: Value from ``X-AwoooP-Operator-Id``.
operator_key: Value from ``X-AwoooP-Operator-Key``.
configured_key: Server-side shared key. Defaults to settings.
environment: Runtime environment. Defaults to settings.
Returns:
Authenticated operator principal.
Raises:
HTTPException: 401 when authentication is missing/invalid, or 422 for
malformed operator identity.
"""
cleaned_operator_id = _clean_operator_id(operator_id)
expected_key = (
settings.AWOOOP_OPERATOR_API_KEY
if configured_key is None
else configured_key
)
runtime_env = (environment or settings.ENVIRONMENT or "").lower()
if not expected_key:
if runtime_env in _PROD_ENVS:
logger.critical(
"awooop_operator_key_missing_in_production",
environment=runtime_env,
)
raise _auth_error()
logger.warning(
"awooop_operator_key_skipped_dev_only",
environment=runtime_env,
operator_id=cleaned_operator_id,
)
return AwoooPOperatorPrincipal(
operator_id=cleaned_operator_id,
auth_method="dev_header",
)
if not operator_key:
logger.warning("awooop_operator_key_missing", operator_id=cleaned_operator_id)
raise _auth_error()
if not secrets.compare_digest(operator_key, expected_key):
logger.warning("awooop_operator_key_invalid", operator_id=cleaned_operator_id)
raise _auth_error()
return AwoooPOperatorPrincipal(
operator_id=cleaned_operator_id,
auth_method="operator_api_key",
)
async def verify_awooop_operator(
x_awooop_operator_id: Annotated[
str | None,
Header(alias="X-AwoooP-Operator-Id"),
] = None,
x_awooop_operator_key: Annotated[
str | None,
Header(alias="X-AwoooP-Operator-Key"),
] = None,
) -> AwoooPOperatorPrincipal:
"""FastAPI dependency for operator mutation endpoints."""
return authenticate_awooop_operator_headers(
operator_id=x_awooop_operator_id,
operator_key=x_awooop_operator_key,
)

View File

@@ -145,7 +145,7 @@ class Settings(BaseSettings):
# ==========================================================================
# ADR-104: LLM Playbook Generator
# 成功修復且未命中既有 Playbook 時,用本地 LLM 生成 DRAFT/REVIEW Playbook。
# 成本護欄:實作層只走 local providerOllama 111 → Ollama 188),不新增雲端 fallback。
# 成本護欄:實作層只走 local providerGCP-A → GCP-B → 111),不新增雲端 fallback。
# 回滾指令: kubectl set env deployment/awoooi-api ENABLE_LLM_PLAYBOOK_GENERATION=false
# ==========================================================================
ENABLE_LLM_PLAYBOOK_GENERATION: bool = Field(
@@ -215,8 +215,8 @@ class Settings(BaseSettings):
description="Phase 25 P0: DIAGNOSE NIM timeout (秒),實測 2.2-27.3s avg 10.6s60s 含 buffer",
)
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
default=200,
description="Phase 25 P0: Ollama timeout (秒),實測 CPU-only 238s保留欄位但 DIAGNOSE 不再走 Ollama",
default=300,
description="Ollama diagnose timeout (秒)。GCP qwen3:14b CPU-only can exceed the old 120s proxy limit.",
)
# ==========================================================================
@@ -370,11 +370,16 @@ class Settings(BaseSettings):
)
return v
# 2026-04-25 Claude Engineer-C (P1.1): Ollama 健康檢測推理測試模型
# 2026-05-05 Codex: health inference must stay on alert-fast model; qwen2.5
# keeps reloading a 7B model on CPU-only GCP and slows incident fallback.
OLLAMA_HEALTH_CHECK_MODEL: str = Field(
default="qwen2.5:7b-instruct",
default="gemma3:4b",
description="OllamaHealthMonitor 推理測試使用模型P1.1",
)
OLLAMA_EMBEDDING_MODEL: str = Field(
default="bge-m3:latest",
description="Ollama embedding model. ADR-110 migrated embeddings from nomic-embed-text to bge-m3.",
)
# 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級更新必要模型清單nomic→bge-m3 + 新增 qwen3:14b + hermes3
OLLAMA_REQUIRED_MODELS: list[str] = Field(
@@ -500,10 +505,42 @@ class Settings(BaseSettings):
default=False,
description=(
"Allow LocalCodeReviewService to fall back to Gemini when the "
"GCP-B/Ollama code-review lane fails. Default false to avoid "
"local Ollama code-review lane fails. Default false to avoid "
"unexpected cloud spend from Gitea push/PR alerts."
),
)
ALERT_AI_ALLOW_CLOUD_FALLBACK: bool = Field(
default=True,
description=(
"Allow incident/alert OpenClaw analysis to use cloud fallback "
"providers after the GCP-A/GCP-B/111 Ollama lane is exhausted. "
"Default true so Gemini can act as the final backup, after the "
"ordered Ollama lane is exhausted."
),
)
ALERT_AI_ENFORCE_OLLAMA_FIRST: bool = Field(
default=True,
description=(
"Force incident/alert OpenClaw analysis to try GCP-A, then GCP-B, "
"then local 111 before cloud backup providers such as Gemini."
),
)
ALERT_OLLAMA_MODEL: str = Field(
default="qwen3:14b",
description=(
"Ollama model used for incident/alert deep diagnosis. Alert cards "
"may wait for this model; Gemini remains a backup after GCP-A, "
"GCP-B, and 111 fail."
),
)
INCIDENT_LLM_TIMEOUT_SECONDS: int = Field(
default=360,
description=(
"Outer timeout for incident OpenClaw proposal generation. This must "
"be long enough for the GCP-A/GCP-B/111 Ollama lane to complete "
"before Gemini backup is considered useful."
),
)
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
NVIDIA_API_KEY: str = Field(
default="",
@@ -565,6 +602,13 @@ class Settings(BaseSettings):
default="",
description="API Key for K8s admin endpoints (X-K8s-Api-Key header)",
)
AWOOOP_OPERATOR_API_KEY: str = Field(
default="",
description=(
"API key for AwoooP operator mutation endpoints "
"(X-AwoooP-Operator-Key header)"
),
)
# ==========================================================================
# 統帥鐵律:禁止 SQLite (AWOOOI 憲法)
@@ -855,7 +899,7 @@ class Settings(BaseSettings):
# ==========================================================================
# MCP Phase 2b: Prometheus MCP Server (ADR-071, 2026-04-11 Claude Sonnet 4.6)
# ==========================================================================
# 2026-04-29 ogt + Claude Opus 4.7: drift fix — 188 是 Ollama HubPrometheus 實際在 110
# 2026-04-29 ogt + Claude Opus 4.7: drift fix — Prometheus 實際在 110
# ConfigMap 04-configmap.yaml 也是 110governance_agent / SLO check 連 188 會 timeout
# 此 drift 是 SPF-4 (governance_agent silently fail) 根因之一
PROMETHEUS_URL: str = Field(
@@ -929,7 +973,7 @@ class Settings(BaseSettings):
"devops": "192.168.0.110", # Harbor, GH Runner
"security": "192.168.0.112", # Kali Scanner
"k3s_master": "192.168.0.120", # K3s Master
"ai_web": "192.168.0.188", # Nginx, Postgres, Redis, Ollama
"ai_web": "192.168.0.188", # Nginx, Postgres, Redis, SignOz
}

View File

@@ -11,6 +11,7 @@ Features:
"""
import logging
import re
import sys
from typing import Any
@@ -19,6 +20,28 @@ from structlog.types import Processor
from src.core.config import settings
_TELEGRAM_BOT_URL_RE = re.compile(r"(api\.telegram\.org/bot)[^/\s]+")
def _redact_sensitive_log_text(text: str) -> str:
"""遮蔽可能出現在第三方 logger 訊息中的敏感 URL。"""
return _TELEGRAM_BOT_URL_RE.sub(r"\1<redacted>", text)
class SensitiveURLRedactionFilter(logging.Filter):
"""標準 logging filter避免 httpx 等第三方 logger 把 token URL 打進 log。"""
def filter(self, record: logging.LogRecord) -> bool:
record.msg = _redact_sensitive_log_text(str(record.msg))
if isinstance(record.args, tuple):
record.args = tuple(_redact_sensitive_log_text(str(arg)) for arg in record.args)
elif isinstance(record.args, dict):
record.args = {
key: _redact_sensitive_log_text(str(value))
for key, value in record.args.items()
}
return True
def setup_logging() -> None:
"""Configure structlog for the application"""
@@ -68,6 +91,15 @@ def setup_logging() -> None:
stream=sys.stdout,
level=logging.getLevelName(settings.LOG_LEVEL),
)
redaction_filter = SensitiveURLRedactionFilter()
root_logger = logging.getLogger()
root_logger.addFilter(redaction_filter)
for handler in root_logger.handlers:
handler.addFilter(redaction_filter)
# httpx INFO 會輸出完整 request URLTelegram Bot API URL 內含 token。
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
def get_logger(name: str | None = None, **initial_context: Any) -> structlog.BoundLogger:

View File

@@ -17,6 +17,7 @@ PostgreSQL 事務管理器,確保多表操作原子性。
from typing import Any
import structlog
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
logger = structlog.get_logger(__name__)
@@ -49,14 +50,20 @@ class UnitOfWork:
- Redis 操作失敗時必須手動呼叫 rollback()
"""
def __init__(self, session_factory: async_sessionmaker[AsyncSession]):
def __init__(
self,
session_factory: async_sessionmaker[AsyncSession],
project_id: str | None = None,
):
"""
初始化 UnitOfWork
Args:
session_factory: SQLAlchemy async session factory
project_id: RLS project context. None means contextvar/default awoooi.
"""
self._session_factory = session_factory
self._project_id = project_id
self._session: AsyncSession | None = None
self._committed = False
@@ -74,9 +81,18 @@ class UnitOfWork:
async def __aenter__(self) -> "UnitOfWork":
"""進入事務"""
from src.core.context import get_current_project_id
self._session = self._session_factory()
effective_pid = (
self._project_id if self._project_id is not None else get_current_project_id()
)
await self._session.execute(
text("SELECT set_config('app.project_id', :pid, TRUE)"),
{"pid": effective_pid},
)
self._committed = False
logger.debug("uow_started")
logger.debug("uow_started", project_id=effective_pid)
return self
async def __aexit__(

View File

@@ -10,7 +10,7 @@ from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from typing import Any
from uuid import UUID, uuid4
from uuid import UUID
from sqlalchemy import (
Boolean,
@@ -577,8 +577,8 @@ class AwoooPMcpGatewayAudit(Base):
run_id: Mapped[UUID | None] = mapped_column(nullable=True)
trace_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
agent_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
tool_id: Mapped[UUID] = mapped_column(
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=False
tool_id: Mapped[UUID | None] = mapped_column(
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=True
)
tool_name: Mapped[str] = mapped_column(String(128), nullable=False)
credential_ref: Mapped[str | None] = mapped_column(String(256), nullable=True)
@@ -635,6 +635,13 @@ class AwoooPConversationEvent(Base):
content_type: Mapped[str] = mapped_column(String(32), nullable=False, default="text")
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
redaction_version: Mapped[str] = mapped_column(
String(32), nullable=False, server_default=text("'audit_sink_v1'")
)
source_envelope: Mapped[dict[str, Any]] = mapped_column(
JSONB, nullable=False, server_default=text("'{}'::jsonb")
)
attachment_sha256: Mapped[str | None] = mapped_column(String(64), nullable=True)
is_duplicate: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
provider_ts: Mapped[datetime | None] = mapped_column(nullable=True)
@@ -680,6 +687,13 @@ class AwoooPOutboundMessage(Base):
message_type: Mapped[str] = mapped_column(String(32), nullable=False)
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
redaction_version: Mapped[str] = mapped_column(
String(32), nullable=False, server_default=text("'audit_sink_v1'")
)
source_envelope: Mapped[dict[str, Any]] = mapped_column(
JSONB, nullable=False, server_default=text("'{}'::jsonb")
)
provider_message_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
send_status: Mapped[str] = mapped_column(String(16), nullable=False, default="pending")
send_error: Mapped[str | None] = mapped_column(Text, nullable=True)

View File

@@ -106,10 +106,13 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
factory = get_session_factory()
async with factory() as session:
try:
from src.core.context import get_current_project_id
# AwoooP Phase 2.3 (2026-05-04 ogt): SET LOCAL app.project_id 讓 RLS Policy 生效
# 預設 'awoooi',多租戶路由將在 middleware 注入實際 project_id
# 預設 'awoooi',多租戶路由將透過 contextvar 注入實際 project_id
await session.execute(
text("SELECT set_config('app.project_id', 'awoooi', TRUE)")
text("SELECT set_config('app.project_id', :pid, TRUE)"),
{"pid": get_current_project_id()},
)
yield session
await session.commit()

View File

@@ -9,6 +9,7 @@ Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型111→ Tel
debugger/vuln → deepseek-r1:14b推理; code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
"""
from __future__ import annotations
import asyncio
import re
import time
@@ -17,7 +18,6 @@ import httpx
import structlog
from sqlalchemy import text
from src.core.config import settings
from src.core.redis_client import get_redis
from src.db.base import get_db_context
from src.hermes.agent_loader import get_agent_system_prompt
@@ -266,7 +266,9 @@ async def process_nl_message(
success = False
error_type: str | None = None
try:
ollama_base = getattr(settings, "OLLAMA_URL", "http://34.143.170.20:11434") # 2026-05-03 ogt: ADR-110 GCP-A Primary
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
ollama_base = resolve_ollama_endpoint("hermes")
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
resp = await _hc.post(
f"{ollama_base}/api/chat",

View File

@@ -46,6 +46,7 @@ _DEDUP_TTL_SEC = 3600 # 同一告警 1 小時內不重複發送
_TG_SILENCE_THRESHOLD = 2 # PENDING telegram_message_id IS NULL 告警門檻
_FLYWHEEL_SUCCESS_MIN = 0.30 # 執行成功率下限
_STUCK_ANALYSIS_THRESHOLD = 3 # Agent Debate 失敗導致卡住的告警門檻
_TRUST_DRIFT_META_MIN_RATIO = 0.20 # 低於此比例只記治理事件,不升 Meta System
# 2026-05-03 ogt + Claude Opus 4.7 — feedback_silencing_alerts_recurring_violation
# 啟動寬限期30 分鐘內可 skip「資料還沒到」噪音超過寬限期仍空 = 真資料管線斷,必須告警
@@ -210,7 +211,8 @@ async def _check_once() -> None:
from src.services.governance_agent import get_governance_agent
trust_result = await get_governance_agent().check_trust_drift(emit_alert=False)
drifted = trust_result.get("drifted", 0)
if drifted > 0:
drift_ratio = float(trust_result.get("drift_ratio") or 0.0)
if drifted > 0 and drift_ratio >= _TRUST_DRIFT_META_MIN_RATIO:
auto_deprecated = trust_result.get("auto_deprecated", 0)
kept = trust_result.get("kept", 0)
violations.append(
@@ -219,6 +221,13 @@ async def _check_once() -> None:
)
# 2026-05-05 ogt W6 修復:移除動態 low_count避免 count 微變繞過 dedup
violation_codes.append("W6:trust_drift")
elif drifted > 0:
logger.info(
"watchdog_w6_trust_drift_below_meta_threshold",
drifted=drifted,
drift_ratio=round(drift_ratio, 3),
threshold=_TRUST_DRIFT_META_MIN_RATIO,
)
except Exception as e:
logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e))

View File

@@ -479,7 +479,7 @@ async def _collect_all_k8s_assets() -> tuple[list[dict[str, Any]], list[dict[str
# 6. Prometheus targets — 補齊 host-install services (110/112/188/125 等非 K8s)
# Gap 1 修補 (2026-04-19 audit): 原本 asset_inventory 只涵蓋 K8s,
# 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis/Ollama host-install 全漏
# 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis host-install 全漏
# 用 Prometheus /api/v1/targets 自動發現全節點服務
try:
prom_assets, host_relationships = await _collect_prometheus_targets()

View File

@@ -172,7 +172,7 @@ _LLM_FORECAST_PROMPT = """你是 AWOOOI 容量規劃專家。以下 host 過去
{findings_json}
## 當前主機環境資訊
- 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/Ollama/MinIO)
- 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/MinIO)
- 判斷請考慮: 該主機上跑什麼服務、常見瓶頸模式
## 輸出規格 (必須是合法 JSON,純 JSON 無前後文字)

View File

@@ -28,7 +28,7 @@ from datetime import timedelta
import structlog
from sqlalchemy import select, update
from src.db.base import get_session_factory
from src.db.base import get_db_context
from src.db.models import AiGovernanceEvent, KnowledgeEntryRecord
from src.utils.timezone import now_taipei
@@ -129,7 +129,7 @@ class KbRotCleaner:
rot_reasons: dict[str, list[str]] = {}
total = 0
async with get_session_factory()() as session:
async with get_db_context() as session:
# 只掃 active 狀態(非 archived
q = await session.execute(
select(KnowledgeEntryRecord).where(
@@ -193,7 +193,7 @@ class KbRotCleaner:
if not result.stale_ids:
return
async with get_session_factory()() as session:
async with get_db_context() as session:
# 逐條更新(避免 bulk update 覆蓋 tags JSONB
q = await session.execute(
select(KnowledgeEntryRecord).where(
@@ -220,7 +220,7 @@ class KbRotCleaner:
async def _save_event(self, result: RotScanResult) -> None:
"""寫 kb_stale 事件到 ai_governance_events。"""
try:
async with get_session_factory()() as session:
async with get_db_context() as session:
event = AiGovernanceEvent(
event_type="kb_stale",
details=result.to_dict(),

View File

@@ -25,7 +25,9 @@ Feature Flag
from __future__ import annotations
import asyncio
import json
import structlog
from src.core.config import settings

View File

@@ -33,7 +33,7 @@ from datetime import timedelta
import structlog
from sqlalchemy import and_, select, update
from src.db.base import get_session_factory
from src.db.base import get_db_context
from src.db.models import KnowledgeEntryRecord
from src.models.knowledge import EntryStatus
from src.utils.timezone import now_taipei
@@ -112,8 +112,7 @@ class KnowledgeDecayJob:
cutoff = now_taipei() - timedelta(days=DECAY_AGE_DAYS)
decayable_statuses = [EntryStatus.DRAFT.value, EntryStatus.REVIEW.value]
session_factory = get_session_factory()
async with session_factory() as db:
async with get_db_context() as db:
# 查30 天未引用view_count=0且 updated_at < cutoff 的 draft/review 條目
stmt = select(KnowledgeEntryRecord).where(
and_(

View File

@@ -29,7 +29,7 @@ from datetime import timedelta
import structlog
from sqlalchemy import and_, select
from src.db.base import get_session_factory
from src.db.base import get_db_context
from src.db.models import AgentSession, AiGovernanceEvent, AutoRepairExecution, IncidentEvidence
from src.utils.timezone import now_taipei
@@ -109,9 +109,7 @@ class OfflineReplayService:
async def _run_replay(self) -> OfflineReplayReport:
cutoff = now_taipei() - timedelta(days=REPLAY_LOOKBACK_DAYS)
session_factory = get_session_factory()
async with session_factory() as db:
async with get_db_context() as db:
# 1. 取最近 N 個有 AgentSession(coordinator) 的 Incident
stmt = (
select(AgentSession.incident_id)
@@ -137,7 +135,7 @@ class OfflineReplayService:
)
results: list[IncidentReplayResult] = []
async with session_factory() as db:
async with get_db_context() as db:
for incident_id in incident_ids:
r = await self._replay_one(db, incident_id)
results.append(r)

View File

@@ -76,12 +76,13 @@ from src.api.v1 import terminal as terminal_v1 # Phase 19.1: Omni-Terminal SSE
from src.api.v1 import timeline as timeline_v1
from src.api.v1 import webhooks as webhooks_v1
from src.core.config import settings
from src.core.feature_flags import aiops_flags # ADR-080: AI 自主化飛輪 feature flags 啟動驗證
from src.core.http_client import close_all_http_clients, init_all_http_clients
from src.core.logging import get_logger, setup_logging
from src.core.redis_client import close_redis_pool, init_redis_pool
from src.core.sse import get_publisher
from src.core.telemetry import setup_telemetry, shutdown_telemetry
from src.services.adr100_slo_metrics_service import get_adr100_slo_metrics_service
from src.services.flywheel_stats_service import get_flywheel_stats_service
# CTO-201: Database & Executor
from src.db.base import close_db, init_db
@@ -553,7 +554,6 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
# 2026-04-27 P3.1-T3 by Claude
try:
from src.utils.timezone import now_taipei
from datetime import datetime as _dt
async def _run_kb_rot_cleaner_loop() -> None:
from src.jobs.kb_rot_cleaner import get_kb_rot_cleaner
@@ -683,7 +683,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
logger.warning("ollama_failover_system_start_failed", error=str(e))
# 2026-04-27 P3.2.2 by Claude — AI Provider 版本追蹤(每 1 小時)
# 探測 5 Providerollama/ollama_188/gemini/claude/openclaw_nemo版本
# 探測 5 Providerollama/ollama_local/gemini/claude/openclaw_nemo版本
# 寫入 ai_provider_version_history版本變更時 log warningP3.2.3 alerter 後續整合
try:
async def _run_model_version_tracker_loop() -> None:
@@ -1005,10 +1005,24 @@ app.include_router(platform_v1.router, prefix="/api/v1/platform", tags=["AwoooP
@app.get("/metrics", include_in_schema=False)
async def prometheus_metrics() -> Response:
"""Prometheus metrics endpoint for alerting"""
return Response(
content=generate_latest(),
media_type=CONTENT_TYPE_LATEST,
)
content = generate_latest().decode("utf-8")
# 2026-05-07 ogt + Claude Sonnet 4.6 — INC-20260507-99ADF2 修復
# 飛輪指標awoooi_flywheel_*)原本只在 /api/v1/stats/flywheel/metrics 暴露,
# 110 Prom awoooi-api job scrape /metrics 時抓不到 → FlywheelExecutionRateMissing 永久 firing
# 修法:在此串入飛輪指標,讓既有 scrape job 無需新增 job 即可抓到
try:
flywheel_metrics = await get_flywheel_stats_service().compute()
content += flywheel_metrics.to_prometheus_lines()
except Exception:
logger.warning("prometheus_metrics_flywheel_error")
# 2026-05-14 Codex — T18 ADR-100 SLO emitter
# GovernanceAgent 讀 Prometheus recording rules若 /metrics 不吐底層 DB totals
# sli:* rules 會全空並每小時重複發 governance_slo_data_gap。
try:
content += await get_adr100_slo_metrics_service().to_prometheus_lines()
except Exception as exc:
logger.warning("prometheus_metrics_adr100_slo_error", error=str(exc))
return Response(content=content, media_type=CONTENT_TYPE_LATEST)
# =============================================================================

View File

@@ -29,7 +29,7 @@ from __future__ import annotations
from prometheus_client import Histogram
# Buckets 對齊 NIM 實測分佈2-27s並覆蓋三段 timeout 30/20/15s 邊界
# 低端0.5-5s快速路徑Ollama 188 本地
# 低端0.5-5s快速路徑Ollama provider pool
# 中端5-20sNIM + Gemini fallback
# 高端20-60s超時 / 慢速 Provider
_AGENT_STEP_BUCKETS = [0.5, 1.0, 2.0, 5.0, 10.0, 15.0, 20.0, 30.0, 45.0, 60.0]

View File

@@ -39,14 +39,15 @@ import hashlib
import json
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from datetime import UTC, datetime
from typing import Any
from uuid import UUID
import structlog
from sqlalchemy import select, text
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from src.core.redis_client import get_redis
from src.db.awooop_models import (
AwoooPActiveRevision,
AwoooPMcpGatewayAudit,
@@ -277,7 +278,7 @@ class McpGateway:
self, ctx: GatewayContext, gate_result: GateCheckResult
) -> tuple[AwoooPMcpToolRegistry, AwoooPMcpGrant]:
"""Gate 3tool 在白名單 + grant 有效(未到期、未撤銷)"""
now = datetime.now(timezone.utc)
now = datetime.now(UTC)
# 查 tool registry
tool_result = await self._db.execute(
@@ -359,14 +360,9 @@ class McpGateway:
raise GateApprovalError("write/admin 操作需要 run_idapproval 追蹤用)")
try:
import aioredis
from src.core.config import settings
redis = aioredis.from_url(settings.REDIS_URL)
redis = get_redis()
approval_key = f"mcp_approval:{ctx.project_id}:{ctx.agent_id}:{ctx.tool_name}:{ctx.run_id}"
approved = await redis.get(approval_key)
await redis.aclose()
except Exception as exc:
logger.warning(
"mcp_gate5_redis_error",
@@ -392,10 +388,7 @@ class McpGateway:
parameters: dict[str, Any],
) -> MCPToolResult:
"""呼叫底層 MCP provider 執行工具"""
registry = get_provider_registry()
provider = registry.get(ctx.tool_name) or registry.get(
tool_row.tool_name if tool_row else ctx.tool_name
)
provider = await self._resolve_provider(ctx, tool_row)
# 找不到 provider → 回傳 shadow no-op
if provider is None:
@@ -411,14 +404,57 @@ class McpGateway:
)
audit_params = dict(parameters)
existing_audit = (
parameters.get("_mcp_audit")
if isinstance(parameters, dict) and isinstance(parameters.get("_mcp_audit"), dict)
else {}
)
audit_params["_mcp_audit"] = {
"project_id": ctx.project_id,
"agent_id": ctx.agent_id,
"run_id": str(ctx.run_id) if ctx.run_id else None,
"trace_id": ctx.trace_id,
"incident_id": existing_audit.get("incident_id") or ctx.trace_id,
"session_id": existing_audit.get("session_id"),
"flywheel_node": existing_audit.get("flywheel_node"),
"agent_role": existing_audit.get("agent_role") or ctx.agent_id,
"gateway_path": "awooop_mcp_gateway",
}
return await provider.execute(ctx.tool_name, audit_params)
async def _resolve_provider(
self,
ctx: GatewayContext,
tool_row: AwoooPMcpToolRegistry | None,
):
"""Find the provider that owns ctx.tool_name.
ProviderRegistry is keyed by provider name (`kubernetes`, `ssh_host`, ...),
while GatewayContext intentionally uses the governed tool name
(`kubectl_get`, `ssh_diagnose`, ...). Scan provider tool manifests as the
compatibility bridge until registry exposes a first-class tool index.
"""
registry = get_provider_registry()
direct = registry.get(ctx.tool_name)
if direct is not None:
return direct
lookup_name = tool_row.tool_name if tool_row else ctx.tool_name
for provider in registry.all():
try:
tools = await provider.list_tools()
except Exception as exc:
logger.debug(
"mcp_gateway_provider_manifest_skipped",
provider=getattr(provider, "name", None),
tool_name=lookup_name,
error=str(exc),
)
continue
if any(tool.name == lookup_name for tool in tools):
return provider
return None
# ── Audit log ─────────────────────────────────────────────────────────────
async def _write_audit(
@@ -446,6 +482,15 @@ class McpGateway:
json.dumps(result.output, sort_keys=True, default=str).encode()
).hexdigest()
gate_payload = {
**gate_result.as_dict(),
"schema_version": "awooop_mcp_gateway_audit_v1",
"gateway_path": "awooop_mcp_gateway",
"policy_enforced": True,
"is_shadow": ctx.is_shadow,
"required_scope": ctx.required_scope,
}
audit = AwoooPMcpGatewayAudit(
project_id=ctx.project_id,
run_id=ctx.run_id,
@@ -455,16 +500,15 @@ class McpGateway:
tool_name=ctx.tool_name,
input_hash=input_hash,
output_hash=output_hash,
gate_result=gate_result.as_dict(),
gate_result=gate_payload,
result_status=result_status,
block_gate=block_gate,
block_reason=block_reason,
latency_ms=latency_ms,
)
if tool_row is not None:
self._db.add(audit)
await self._db.flush()
self._db.add(audit)
await self._db.flush()
except Exception as exc:
logger.warning(
"mcp_gateway_audit_write_failed",

View File

@@ -14,6 +14,7 @@ from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
from uuid import uuid4
from src.utils.timezone import now_taipei
@@ -29,7 +30,9 @@ class MCPTool:
name: str
description: str
input_schema: dict[str, Any]
server_name: str
# 2026-05-06 Codex: 部分舊 provider 的 list_tools() 尚未傳 server_name
# 先給 DTO 預設值registry 會以 provider.name 補正,避免啟動登記直接 crash。
server_name: str = ""
@dataclass
@@ -41,12 +44,21 @@ class MCPToolResult:
"""
success: bool
execution_id: str
execution_id: str = ""
output: Any | None = None
# 2026-05-06 Codex: 舊 provider 曾使用 data=... 作為成功輸出欄位。
# 保留 alias避免 provider 成功路徑因 dataclass 參數不相容而 crash。
data: Any | None = None
error: str | None = None
duration: float = 0.0
timestamp: datetime = field(default_factory=now_taipei)
def __post_init__(self) -> None:
if not self.execution_id:
self.execution_id = f"mcp-{uuid4()}"
if self.output is None and self.data is not None:
self.output = self.data
def to_dict(self) -> dict:
return {
"success": self.success,

View File

@@ -24,6 +24,7 @@ from typing import Any
import httpx
from src.core.config import settings # P0-13: K8s namespace 由 settings.AWOOOI_K8S_NAMESPACE 提供
from src.services.mcp_audit_context import with_mcp_audit_context
from src.utils.timezone import now_taipei
logger = logging.getLogger(__name__)
@@ -518,6 +519,13 @@ class MCPBridge:
raise ValueError(f"Unknown MCP Server: {server_name}")
server = self._servers[server_name]
parameters = with_mcp_audit_context(
parameters,
session_id=f"mcp_bridge:{execution_id}",
flywheel_node="govern",
agent_role="mcp_bridge",
gateway_path="legacy_mcp_bridge",
)
result = await self._execute_tool(server, tool_name, parameters)
# ========================================

View File

@@ -41,6 +41,7 @@ SSH 連線:
@see docs/superpowers/specs/2026-04-10-infra-rebuild-sprint-abc-design.md §MCP-2a
"""
import logging
import re
import uuid
from datetime import UTC, datetime
@@ -51,6 +52,7 @@ import structlog
from src.plugins.mcp.interfaces import MCPTool, MCPToolProvider, MCPToolResult
logger = structlog.get_logger(__name__)
_asyncssh_logger_configured = False
# =============================================================================
# 安全常數
@@ -58,10 +60,17 @@ logger = structlog.get_logger(__name__)
SSH_KEY_PATH = "/run/secrets/ssh_mcp_key"
SSH_USER = "wooo"
SSH_PORT = 22
DEFAULT_HOST_USERS = {
# AI/Web host is operated by the ollama account in the current topology.
"192.168.0.188": "ollama",
}
SHORT_HOST_MAP = {
"110": "192.168.0.110",
"120": "192.168.0.120",
"121": "192.168.0.121",
"188": "192.168.0.188",
}
DIAG_TIMEOUT = 10 # 診斷類超時(秒)
OP_TIMEOUT = 60 # 操作類超時(秒)
@@ -104,6 +113,47 @@ def _validate_param(key: str, value: str) -> str:
# tail / port / lines 由呼叫方 int() 轉換,不需字串白名單
return value
def _normalize_ssh_host(value: str) -> str:
"""
Normalize host labels before they enter asyncssh.
Prometheus labels often arrive as ``192.168.0.110:9100``. That port is the
exporter port, not SSH. The SSH provider must connect to the host on the
platform SSH port, otherwise asyncssh can receive a stringly port from
config/labels and fail with ``%d format`` before the tool even runs.
"""
host = (value or "").strip()
if host.startswith("ssh://"):
host = host.removeprefix("ssh://")
if "@" in host:
host = host.rsplit("@", 1)[1]
if host.startswith("[") and "]" in host:
return host[1:host.index("]")]
if host.count(":") == 1:
maybe_host, maybe_port = host.rsplit(":", 1)
if maybe_port.isdigit():
host = maybe_host
if host in SHORT_HOST_MAP:
return SHORT_HOST_MAP[host]
return host
def _quiet_asyncssh_info_logs() -> None:
"""Keep third-party asyncssh INFO logs from breaking stdlib %-format logging.
Some target SSH servers send exit status as a string. AsyncSSH then emits an
INFO log with ``%d`` and that string argument before our code sees the
result, which produces noisy ``TypeError: %d format`` tracebacks. The tool
result itself is still available, so production should keep asyncssh at
WARNING and rely on our structured MCP audit logs.
"""
global _asyncssh_logger_configured
if _asyncssh_logger_configured:
return
logging.getLogger("asyncssh").setLevel(logging.WARNING)
_asyncssh_logger_configured = True
# 群組 A只讀
GROUP_A_TOOLS = {
"ssh_diagnose",
@@ -198,6 +248,10 @@ class SSHProvider(MCPToolProvider):
),
input_schema={"type": "object", "properties": {
"host": {"type": "string", "description": "Target host IP"},
"container_name": {
"type": "string",
"description": "Optional Docker container name for container-focused diagnostics",
},
}, "required": ["host"]},
server_name=self.name,
),
@@ -375,7 +429,7 @@ class SSHProvider(MCPToolProvider):
error=f"Unknown tool: {tool_name}",
)
host = parameters.get("host", "")
host = _normalize_ssh_host(str(parameters.get("host", "")))
# 守衛 2: 允許的 host
if host not in self._allowed_hosts():
@@ -500,12 +554,23 @@ class SSHProvider(MCPToolProvider):
# 所有接受用戶字串的工具,必須先通過 _validate_param() 白名單驗證
if tool_name == "ssh_diagnose":
# 2026-04-27 Claude Sonnet 4.6: 主機告警自動診斷 — 只讀,不修改任何狀態
return (
command = (
"echo '=== CPU TOP ===' && ps aux --sort=-%cpu | head -15 && "
"echo '=== MEMORY ===' && free -h && "
"echo '=== DISK ===' && df -h && "
"echo '=== LOAD ===' && uptime"
)
container_name = params.get("container_name")
if container_name:
name = _validate_param("container_name", str(container_name))
command = (
f"{command} && "
f"echo '=== DOCKER STATS {name} ===' && "
f"docker stats --no-stream {name} 2>&1 && "
f"echo '=== DOCKER INSPECT {name} ===' && "
f"docker inspect {name} 2>&1 | head -80"
)
return command
if tool_name == "ssh_get_top_processes":
return "ps aux --sort=-%cpu | head -15"
@@ -604,7 +669,9 @@ class SSHProvider(MCPToolProvider):
raise RuntimeError(
"asyncssh is not installed. "
"Add 'asyncssh' to pyproject.toml dependencies."
)
) from None
_quiet_asyncssh_info_logs()
import os
if not os.path.exists(SSH_KEY_PATH):
@@ -625,11 +692,13 @@ class SSHProvider(MCPToolProvider):
async with asyncssh.connect(
host,
port=SSH_PORT,
username=username or SSH_USER,
client_keys=[SSH_KEY_PATH],
known_hosts=known_hosts_path, # None = 跳過驗證(內網),或指定文件路徑
connect_timeout=timeout,
config=None, # 禁止讀取使用者 ssh config避免 Port 字串污染 asyncssh
connect_timeout=float(timeout),
) as conn:
# Bug 根因asyncssh 模組沒有頂層 run();應呼叫 conn.run()2026-04-24 Claude Sonnet 4.6
result = await conn.run(cmd, timeout=timeout, check=False)
result = await conn.run(cmd, timeout=float(timeout), check=False)
return (result.stdout or ""), (result.stderr or "")

View File

@@ -167,6 +167,31 @@ class DriftReportRepository:
{"report_id": report_id, "narrative": narrative},
)
async def get_repeat_state(self, report: DriftReport) -> dict:
"""Return stable fingerprint repeat state for a drift report."""
from src.services.drift_repeat_state import build_drift_repeat_state
async with get_db_context() as db:
result = await db.execute(
text("""
SELECT
report_id,
namespace,
status,
scanned_at,
created_at,
items
FROM drift_reports
WHERE namespace = :namespace
AND created_at > now() - interval '24 hours'
ORDER BY scanned_at DESC
LIMIT 200
"""),
{"namespace": report.namespace},
)
rows = [dict(row) for row in result.mappings().all()]
return build_drift_repeat_state(report, rows)
_drift_repo: DriftReportRepository | None = None

View File

@@ -60,13 +60,17 @@ class MetricsDBRepository(IMetricsRepository):
cutoff = datetime.now(UTC) - timedelta(hours=hours)
# Query: 統計 executed vs total (approved + executed + execution_failed)
# 2026-05-06 ogt + Codex:
# approval_records.status 目前實際寫入的是大寫 enum
# (APPROVED / EXECUTION_SUCCESS / EXECUTION_FAILED)。舊查詢只看
# lowercase executed導致 AI Success 在報表層永遠趨近 0。
query = text("""
SELECT
COUNT(CASE WHEN status = 'executed' THEN 1 END) as executed_count,
COUNT(CASE WHEN UPPER(status::text) = 'EXECUTION_SUCCESS' THEN 1 END) as executed_count,
COUNT(*) as total_count
FROM approval_records
WHERE created_at >= :cutoff
AND status IN ('approved', 'executed', 'execution_failed')
AND UPPER(status::text) IN ('APPROVED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED')
""")
result = await session.execute(query, {"cutoff": cutoff})
@@ -127,11 +131,11 @@ class MetricsDBRepository(IMetricsRepository):
trend_query = text("""
SELECT
date_trunc('hour', created_at) as hour_bucket,
COUNT(CASE WHEN status = 'executed' THEN 1 END) * 100.0 /
COUNT(CASE WHEN UPPER(status::text) = 'EXECUTION_SUCCESS' THEN 1 END) * 100.0 /
NULLIF(COUNT(*), 0) as hourly_rate
FROM approval_records
WHERE created_at >= :cutoff
AND status IN ('approved', 'executed', 'execution_failed')
AND UPPER(status::text) IN ('APPROVED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED')
GROUP BY hour_bucket
ORDER BY hour_bucket DESC
LIMIT :limit

View File

@@ -104,7 +104,7 @@ async def get_agent_thinking(
) -> StreamingResponse:
"""
OpenClaw 思考軌跡 (SSE 串流)
Phase 1.2: 真實串接 Ollama at 192.168.0.188:11434
Phase 1.2: 真實串接設定中的 Ollama provider pool
"""
async def generate_thinking_stream():

View File

@@ -0,0 +1,606 @@
"""
ADR-100 Remediation Service
===========================
Safe operator entrypoints for verification remediation work items.
T25: remediation queue items are now actionable without mutating incident state:
- preview: show the selected guardrail path
- dry-run: collect read-only current state and validate supported executor routing
"""
from __future__ import annotations
import asyncio
from typing import Any, Literal, Protocol
import structlog
from src.models.incident import Incident
from src.repositories.incident_repository import IncidentDBRepository
from src.services.adr100_slo_status_service import (
Adr100SloStatusService,
get_adr100_slo_status_service,
)
from src.services.auto_repair_service import AutoRepairService
from src.services.post_execution_verifier import (
PostExecutionVerifier,
_assess_recovery,
_build_prometheus_query,
get_post_execution_verifier,
)
logger = structlog.get_logger(__name__)
RemediationMode = Literal["auto", "reverify", "replay"]
_READY_STATUSES = {"ready_for_replay", "ready_for_reverify"}
class RemediationNotFoundError(LookupError):
"""Requested ADR-100 remediation work item is not in the current read model."""
class _IncidentRepository(Protocol):
async def get_by_id(self, incident_id: str) -> Incident | None:
...
class Adr100RemediationService:
"""Read-only remediation preview and dry-run service."""
def __init__(
self,
*,
slo_service: Adr100SloStatusService | None = None,
incident_repository: _IncidentRepository | None = None,
auto_repair_service: AutoRepairService | None = None,
verifier: PostExecutionVerifier | None = None,
timeline_service: Any | None = None,
alert_operation_log_repository: Any | None = None,
record_history: bool = True,
) -> None:
self._slo_service = slo_service or get_adr100_slo_status_service()
self._incident_repository = incident_repository or IncidentDBRepository()
self._auto_repair_service = auto_repair_service or AutoRepairService()
self._verifier = verifier or get_post_execution_verifier()
self._timeline_service = timeline_service
self._alert_operation_log_repository = alert_operation_log_repository
self._record_history_enabled = record_history
async def preview(self, work_item_id: str, mode: RemediationMode = "auto") -> dict[str, Any]:
"""Return the safe execution plan for a remediation queue item."""
item = await self._find_work_item(work_item_id)
selected_mode = _select_mode(item, mode)
checks = _base_checks(item)
allowed = all(check["passed"] for check in checks)
return {
"schema_version": "adr100_remediation_preview_v1",
"work_item_id": item.get("work_item_id"),
"incident_id": item.get("incident_id"),
"auto_repair_id": item.get("auto_repair_id"),
"mode": selected_mode,
"allowed": allowed,
"safety_level": "read_only",
"writes_incident_state": False,
"writes_auto_repair_result": False,
"checks": checks,
"plan": _plan_for_item(item, selected_mode),
"source": "adr100.verification_coverage.remediation_queue",
}
async def dry_run(self, work_item_id: str, mode: RemediationMode = "auto") -> dict[str, Any]:
"""Run a safe, read-only remediation dry-run for one queue item."""
item = await self._find_work_item(work_item_id)
selected_mode = _select_mode(item, mode)
checks = _base_checks(item)
incident = await self._load_incident(item)
checks.append({
"name": "incident_loaded",
"passed": incident is not None,
"detail": item.get("incident_id") or "missing incident_id",
})
if incident is None or not all(check["passed"] for check in checks):
payload = _dry_run_blocked_payload(item, selected_mode, checks)
payload["history"] = await self._record_dry_run_history(item, payload)
return payload
if selected_mode == "replay":
return await self._dry_run_replay(item, incident, checks)
return await self._dry_run_reverify(item, incident, checks)
async def history(
self,
*,
limit: int = 50,
incident_id: str | None = None,
work_item_id: str | None = None,
) -> dict[str, Any]:
"""Return durable dry-run history written by this remediation service."""
safe_limit = max(1, min(limit, 200))
fetch_limit = min(max(safe_limit * 4, 50), 200)
rows: list[Any] = []
repo = self._alert_operation_log_repository
if repo is None:
from src.repositories.alert_operation_log_repository import (
get_alert_operation_log_repository,
)
repo = get_alert_operation_log_repository()
for event_type in ("PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED"):
try:
batch, _total = await repo.list_recent(
limit=fetch_limit,
event_type=event_type,
incident_id=incident_id,
)
rows.extend(batch)
except Exception as exc:
logger.warning(
"adr100_remediation_history_fetch_failed",
event_type=event_type,
incident_id=incident_id,
error=str(exc),
)
rows.sort(key=_record_created_at, reverse=True)
items: list[dict[str, Any]] = []
for row in rows:
context = getattr(row, "context", None) or {}
if context.get("schema_version") != "adr100_remediation_dry_run_history_v1":
continue
if work_item_id and context.get("work_item_id") != work_item_id:
continue
items.append(_history_item(row, context))
if len(items) >= safe_limit:
break
return {
"schema_version": "adr100_remediation_history_v1",
"total": len(items),
"limit": safe_limit,
"filters": {
"incident_id": incident_id,
"work_item_id": work_item_id,
},
"items": items,
"by_work_item": _summarize_history_by_work_item(items),
}
async def _find_work_item(self, work_item_id: str) -> dict[str, Any]:
report = await self._slo_service.fetch_report()
coverage = report.get("verification_coverage") or {}
queue = coverage.get("remediation_queue") or {}
for item in queue.get("items") or []:
if item.get("work_item_id") == work_item_id:
return dict(item)
raise RemediationNotFoundError(work_item_id)
async def _load_incident(self, item: dict[str, Any]) -> Incident | None:
incident_id = str(item.get("incident_id") or "")
if not incident_id:
return None
return await self._incident_repository.get_by_id(incident_id)
async def _dry_run_reverify(
self,
item: dict[str, Any],
incident: Incident,
checks: list[dict[str, Any]],
) -> dict[str, Any]:
post_state = await self._collect_current_state(incident)
action_taken = f"dry_run_reverify:{item.get('playbook_id') or 'unknown'}"
result = _assess_recovery(None, post_state, action_taken)
payload = _dry_run_result_payload(
item=item,
mode="reverify",
checks=checks,
post_state=post_state,
verification_result_preview=result,
extra={
"promql": _promql_for_incident(incident),
"mcp_route": {
"agent_id": "post_execution_verifier",
"required_scope": "read",
"is_shadow": True,
"flywheel_node": "verify",
},
},
)
payload["history"] = await self._record_dry_run_history(item, payload)
return payload
async def _dry_run_replay(
self,
item: dict[str, Any],
incident: Incident,
checks: list[dict[str, Any]],
) -> dict[str, Any]:
diagnostic_command = _diagnostic_command_for_incident(incident)
route = self._auto_repair_service.preview_read_only_ssh_mcp_route(
incident,
diagnostic_command,
)
checks.append({
"name": "supported_executor_route",
"passed": route is not None,
"detail": "mcp:ssh_diagnose" if route else "missing host/container route",
})
post_state = await self._collect_current_state(incident)
action_taken = f"dry_run_replay:{item.get('playbook_id') or 'unknown'}"
result = _assess_recovery(None, post_state, action_taken)
payload = _dry_run_result_payload(
item=item,
mode="replay",
checks=checks,
post_state=post_state,
verification_result_preview=result,
extra={
"diagnostic_command_preview": diagnostic_command,
"mcp_route": route,
"promql": _promql_for_incident(incident),
},
)
payload["history"] = await self._record_dry_run_history(item, payload)
return payload
async def _collect_current_state(self, incident: Incident) -> dict[str, Any]:
try:
return await asyncio.wait_for(
self._verifier._collect_post_state(incident),
timeout=12.0,
)
except asyncio.TimeoutError:
logger.warning(
"adr100_remediation_dry_run_timeout",
incident_id=incident.incident_id,
)
return {}
except Exception as exc:
logger.warning(
"adr100_remediation_dry_run_collect_failed",
incident_id=incident.incident_id,
error=str(exc),
)
return {}
async def _record_dry_run_history(
self,
item: dict[str, Any],
payload: dict[str, Any],
) -> dict[str, Any]:
if not self._record_history_enabled:
return {"recorded": False, "reason": "disabled"}
incident_id = str(item.get("incident_id") or "")
if not incident_id:
return {"recorded": False, "reason": "missing_incident_id"}
history: dict[str, Any] = {
"recorded": False,
"alert_operation_id": None,
"timeline_event_id": None,
}
context = _history_context(item, payload)
allowed = bool(payload.get("allowed"))
try:
repo = self._alert_operation_log_repository
if repo is None:
from src.repositories.alert_operation_log_repository import (
get_alert_operation_log_repository,
)
repo = get_alert_operation_log_repository()
record = await repo.append(
"PRE_FLIGHT_PASSED" if allowed else "PRE_FLIGHT_FAILED",
incident_id=incident_id,
auto_repair_id=str(item.get("auto_repair_id") or "") or None,
actor="adr100_remediation_service",
action_detail=f"adr100_remediation_dry_run:{payload.get('mode')}"[:200],
success=allowed,
context=context,
)
if record is not None:
history["alert_operation_id"] = getattr(record, "id", None)
except Exception as exc:
logger.warning(
"adr100_remediation_alert_operation_history_failed",
incident_id=incident_id,
error=str(exc),
)
try:
timeline = self._timeline_service
if timeline is None:
from src.services.approval_db import get_timeline_service
timeline = get_timeline_service()
event = await timeline.add_event(
event_type="verifier",
status=_timeline_status(payload),
title="ADR-100 remediation dry-run",
description=_history_description(context),
actor="adr100_remediation_service",
actor_role=str(payload.get("mode") or "dry_run"),
incident_id=incident_id,
)
if event:
history["timeline_event_id"] = event.get("id")
except Exception as exc:
logger.warning(
"adr100_remediation_timeline_history_failed",
incident_id=incident_id,
error=str(exc),
)
history["recorded"] = bool(
history.get("alert_operation_id") or history.get("timeline_event_id")
)
return history
def _select_mode(item: dict[str, Any], requested: RemediationMode) -> Literal["reverify", "replay"]:
if requested in ("reverify", "replay"):
return requested
if item.get("remediation_status") == "ready_for_reverify":
return "reverify"
if item.get("remediation_action") == "reverify_with_promql_template":
return "reverify"
return "replay"
def _base_checks(item: dict[str, Any]) -> list[dict[str, Any]]:
status = str(item.get("remediation_status") or "unknown")
action = str(item.get("remediation_action") or "unknown")
return [
{
"name": "queue_item_ready",
"passed": status in _READY_STATUSES,
"detail": status,
},
{
"name": "read_only_guardrail",
"passed": action in {
"replay_with_supported_executor",
"reverify_with_promql_template",
},
"detail": action,
},
{
"name": "no_state_mutation",
"passed": True,
"detail": "dry_run_does_not_update_incident_or_auto_repair_rows",
},
]
def _plan_for_item(item: dict[str, Any], mode: str) -> dict[str, Any]:
if mode == "reverify":
return {
"step": "collect_current_state_and_assess",
"agent_id": "post_execution_verifier",
"required_scope": "read",
"writes": [],
}
return {
"step": "validate_supported_executor_route_then_collect_current_state",
"agent_id": "auto_repair_executor",
"required_scope": "read",
"writes": [],
"target_action": item.get("remediation_action"),
}
def _dry_run_blocked_payload(
item: dict[str, Any],
mode: str,
checks: list[dict[str, Any]],
) -> dict[str, Any]:
return {
"schema_version": "adr100_remediation_dry_run_v1",
"work_item_id": item.get("work_item_id"),
"incident_id": item.get("incident_id"),
"auto_repair_id": item.get("auto_repair_id"),
"mode": mode,
"allowed": False,
"executed": False,
"safety_level": "read_only",
"writes_incident_state": False,
"writes_auto_repair_result": False,
"checks": checks,
"verification_result_preview": "blocked",
"post_state_summary": {},
}
def _dry_run_result_payload(
*,
item: dict[str, Any],
mode: str,
checks: list[dict[str, Any]],
post_state: dict[str, Any],
verification_result_preview: str,
extra: dict[str, Any],
) -> dict[str, Any]:
return {
"schema_version": "adr100_remediation_dry_run_v1",
"work_item_id": item.get("work_item_id"),
"incident_id": item.get("incident_id"),
"auto_repair_id": item.get("auto_repair_id"),
"mode": mode,
"allowed": all(check["passed"] for check in checks),
"executed": True,
"safety_level": "read_only",
"writes_incident_state": False,
"writes_auto_repair_result": False,
"checks": checks,
"verification_result_preview": verification_result_preview,
"post_state_summary": _summarize_post_state(post_state),
**extra,
}
def _summarize_post_state(post_state: dict[str, Any]) -> dict[str, Any]:
keys = sorted(post_state.keys())
return {
"tool_count": len(keys),
"tools": keys[:8],
"has_state": bool(post_state),
}
def _history_context(item: dict[str, Any], payload: dict[str, Any]) -> dict[str, Any]:
return {
"schema_version": "adr100_remediation_dry_run_history_v1",
"work_item_id": item.get("work_item_id"),
"auto_repair_id": item.get("auto_repair_id"),
"playbook_id": item.get("playbook_id"),
"alertname": item.get("alertname"),
"mode": payload.get("mode"),
"allowed": payload.get("allowed"),
"executed": payload.get("executed"),
"safety_level": payload.get("safety_level"),
"writes_incident_state": payload.get("writes_incident_state"),
"writes_auto_repair_result": payload.get("writes_auto_repair_result"),
"verification_result_preview": payload.get("verification_result_preview"),
"post_state_summary": payload.get("post_state_summary"),
"mcp_route": payload.get("mcp_route"),
"checks": payload.get("checks"),
}
def _timeline_status(payload: dict[str, Any]) -> str:
if not payload.get("allowed"):
return "warning"
if payload.get("verification_result_preview") == "success":
return "success"
return "warning"
def _history_description(context: dict[str, Any]) -> str:
tool_count = (context.get("post_state_summary") or {}).get("tool_count", 0)
route = context.get("mcp_route") or {}
agent = route.get("agent_id") or "unknown_agent"
tool = route.get("tool_name") or "current_state"
return (
f"mode={context.get('mode')} "
f"preview={context.get('verification_result_preview')} "
f"tools={tool_count} route={agent}/{tool} "
f"writes_incident={context.get('writes_incident_state')} "
f"writes_auto_repair={context.get('writes_auto_repair_result')}"
)[:500]
def _record_created_at(record: Any) -> str:
value = getattr(record, "created_at", None)
if hasattr(value, "isoformat"):
return value.isoformat()
return str(value or "")
def _history_item(record: Any, context: dict[str, Any]) -> dict[str, Any]:
route = context.get("mcp_route") or {}
post_state = context.get("post_state_summary") or {}
return {
"id": str(getattr(record, "id", "")),
"incident_id": getattr(record, "incident_id", None),
"auto_repair_id": getattr(record, "auto_repair_id", None)
or context.get("auto_repair_id"),
"event_type": str(getattr(record, "event_type", "")),
"actor": getattr(record, "actor", None),
"success": getattr(record, "success", None),
"created_at": _record_created_at(record),
"work_item_id": context.get("work_item_id"),
"playbook_id": context.get("playbook_id"),
"alertname": context.get("alertname"),
"mode": context.get("mode"),
"allowed": context.get("allowed"),
"executed": context.get("executed"),
"safety_level": context.get("safety_level"),
"verification_result_preview": context.get("verification_result_preview"),
"tool_count": post_state.get("tool_count", 0),
"tools": post_state.get("tools") or [],
"agent_id": route.get("agent_id"),
"tool_name": route.get("tool_name") or "current_state",
"required_scope": route.get("required_scope"),
"writes_incident_state": context.get("writes_incident_state"),
"writes_auto_repair_result": context.get("writes_auto_repair_result"),
"checks": context.get("checks") or [],
}
def _summarize_history_by_work_item(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
summary: dict[str, dict[str, Any]] = {}
for item in items:
key = str(item.get("work_item_id") or item.get("incident_id") or item.get("id"))
if key not in summary:
summary[key] = {
"work_item_id": item.get("work_item_id"),
"incident_id": item.get("incident_id"),
"count": 0,
"latest_at": item.get("created_at"),
"latest_event_type": item.get("event_type"),
"latest_success": item.get("success"),
"latest_preview": item.get("verification_result_preview"),
"latest_mode": item.get("mode"),
"latest_agent_id": item.get("agent_id"),
"latest_tool_name": item.get("tool_name"),
"required_scope": item.get("required_scope"),
}
summary[key]["count"] += 1
return list(summary.values())
def _diagnostic_command_for_incident(incident: Incident) -> str:
labels = _labels_for_incident(incident)
host = str(labels.get("host") or labels.get("instance") or "{host}")
container = str(labels.get("container_name") or labels.get("container") or "")
if container:
return f"ssh {host} 'uptime; docker stats --no-stream {container}'"
return f"ssh {host} 'uptime; docker stats --no-stream'"
def _promql_for_incident(incident: Incident) -> str:
labels = _labels_for_incident(incident)
alertname = ""
if incident.signals:
signal = incident.signals[0]
alertname = labels.get("alertname") or getattr(signal, "alert_name", "")
return _build_prometheus_query(alertname, labels)
def _labels_for_incident(incident: Incident) -> dict[str, Any]:
if incident.signals:
return incident.signals[0].labels or {}
return {}
_service: Adr100RemediationService | None = None
def get_adr100_remediation_service() -> Adr100RemediationService:
"""Return singleton ADR-100 remediation service."""
global _service
if _service is None:
_service = Adr100RemediationService()
return _service
def set_adr100_remediation_service(service: Adr100RemediationService | None) -> None:
"""Inject ADR-100 remediation service for tests."""
global _service
_service = service

View File

@@ -0,0 +1,354 @@
"""
ADR-100 SLO metrics emitter.
Prometheus recording rules for the AI flywheel SLOs expect a small set of
counter-like metrics. The source of truth already lives in PostgreSQL, so this
read-side emitter exposes DB totals on /metrics without changing runtime write
paths or introducing another state store.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from time import time
from sqlalchemy import text
from src.db.base import get_db_context
@dataclass(frozen=True)
class AutomationOperationSample:
outcome: str
operation_type: str
count: int
@dataclass(frozen=True)
class VerificationSample:
outcome: str
count: int
@dataclass(frozen=True)
class Adr100SloMetricsSnapshot:
automation_operations: list[AutomationOperationSample] = field(default_factory=list)
automation_operations_24h: list[AutomationOperationSample] = field(default_factory=list)
post_execution_verifications: list[VerificationSample] = field(default_factory=list)
post_execution_verifications_24h: list[VerificationSample] = field(default_factory=list)
knowledge_entries_total: int = 0
knowledge_entries_created_24h: int = 0
high_confidence_total: int = 0
high_confidence_success_total: int = 0
emitted_at: float = field(default_factory=time)
class Adr100SloMetricsService:
"""Build ADR-100 Prometheus samples from production DB state."""
async def to_prometheus_lines(self) -> str:
snapshot = await self.fetch_snapshot()
return render_adr100_slo_metrics(snapshot)
async def fetch_snapshot(self) -> Adr100SloMetricsSnapshot:
async with get_db_context() as db:
automation_rows = (
await db.execute(text(_AUTOMATION_OPERATION_SQL))
).fetchall()
automation_24h_rows = (
await db.execute(text(_AUTOMATION_OPERATION_24H_SQL))
).fetchall()
verification_rows = (
await db.execute(text(_POST_EXECUTION_VERIFICATION_SQL))
).fetchall()
verification_24h_rows = (
await db.execute(text(_POST_EXECUTION_VERIFICATION_24H_SQL))
).fetchall()
knowledge_total = int(
(await db.execute(text("SELECT count(*) FROM knowledge_entries"))).scalar()
or 0
)
knowledge_created_24h = int(
(
await db.execute(
text(
"""
SELECT count(*)
FROM knowledge_entries
WHERE created_at >= NOW() - INTERVAL '24 hours'
"""
)
)
).scalar()
or 0
)
confidence_row = (
await db.execute(text(_HIGH_CONFIDENCE_APPROVAL_SQL))
).one()
return Adr100SloMetricsSnapshot(
automation_operations=[
AutomationOperationSample(
outcome=str(row.outcome),
operation_type=str(row.operation_type),
count=int(row.count or 0),
)
for row in automation_rows
],
automation_operations_24h=[
AutomationOperationSample(
outcome=str(row.outcome),
operation_type=str(row.operation_type),
count=int(row.count or 0),
)
for row in automation_24h_rows
],
post_execution_verifications=[
VerificationSample(
outcome=str(row.outcome),
count=int(row.count or 0),
)
for row in verification_rows
],
post_execution_verifications_24h=[
VerificationSample(
outcome=str(row.outcome),
count=int(row.count or 0),
)
for row in verification_24h_rows
],
knowledge_entries_total=knowledge_total,
knowledge_entries_created_24h=knowledge_created_24h,
high_confidence_total=int(confidence_row.high_confidence_total or 0),
high_confidence_success_total=int(
confidence_row.high_confidence_success_total or 0
),
)
def render_adr100_slo_metrics(snapshot: Adr100SloMetricsSnapshot) -> str:
"""Render ADR-100 SLO metrics in Prometheus text exposition format."""
lines: list[str] = [
"",
"# HELP automation_operation_log_total DB-derived AI automation operation count for ADR-100 SLOs",
"# TYPE automation_operation_log_total counter",
]
if snapshot.automation_operations:
for sample in snapshot.automation_operations:
lines.append(
"automation_operation_log_total"
f'{{outcome="{_escape_label(sample.outcome)}",'
f'operation_type="{_escape_label(sample.operation_type)}"}} '
f"{sample.count}"
)
else:
lines.append(
'automation_operation_log_total{outcome="none",operation_type="none"} 0'
)
lines.extend([
"# HELP automation_operation_created_24h DB-derived AI automation operation count created in the last 24 hours for ADR-100 SLO dashboards",
"# TYPE automation_operation_created_24h gauge",
])
if snapshot.automation_operations_24h:
for sample in snapshot.automation_operations_24h:
lines.append(
"automation_operation_created_24h"
f'{{outcome="{_escape_label(sample.outcome)}",'
f'operation_type="{_escape_label(sample.operation_type)}"}} '
f"{sample.count}"
)
else:
lines.append(
'automation_operation_created_24h{outcome="none",operation_type="none"} 0'
)
lines.extend([
"# HELP post_execution_verification_total DB-derived post execution verification result count for ADR-100 SLOs",
"# TYPE post_execution_verification_total counter",
])
if snapshot.post_execution_verifications:
for sample in snapshot.post_execution_verifications:
lines.append(
"post_execution_verification_total"
f'{{outcome="{_escape_label(sample.outcome)}"}} {sample.count}'
)
else:
lines.append('post_execution_verification_total{outcome="none"} 0')
lines.extend([
"# HELP post_execution_verification_created_24h DB-derived post execution verification result count created in the last 24 hours for ADR-100 SLO dashboards",
"# TYPE post_execution_verification_created_24h gauge",
])
if snapshot.post_execution_verifications_24h:
for sample in snapshot.post_execution_verifications_24h:
lines.append(
"post_execution_verification_created_24h"
f'{{outcome="{_escape_label(sample.outcome)}"}} {sample.count}'
)
else:
lines.append('post_execution_verification_created_24h{outcome="none"} 0')
lines.extend([
"# HELP knowledge_entries_total DB-derived knowledge entry count for ADR-100 SLOs",
"# TYPE knowledge_entries_total counter",
f"knowledge_entries_total {snapshot.knowledge_entries_total}",
"# HELP knowledge_entries_created_24h DB-derived knowledge entries created in the last 24 hours for ADR-100 SLOs",
"# TYPE knowledge_entries_created_24h gauge",
f"knowledge_entries_created_24h {snapshot.knowledge_entries_created_24h}",
"# HELP approval_records_high_confidence_total DB-derived high confidence approval decisions for ADR-100 SLOs",
"# TYPE approval_records_high_confidence_total counter",
f"approval_records_high_confidence_total {snapshot.high_confidence_total}",
"# HELP approval_records_high_confidence_success_total DB-derived high confidence approval decisions with successful verification for ADR-100 SLOs",
"# TYPE approval_records_high_confidence_success_total counter",
(
"approval_records_high_confidence_success_total "
f"{snapshot.high_confidence_success_total}"
),
"# HELP adr100_slo_emitter_last_success_timestamp Last successful ADR-100 DB metrics emission timestamp",
"# TYPE adr100_slo_emitter_last_success_timestamp gauge",
f"adr100_slo_emitter_last_success_timestamp {snapshot.emitted_at:.0f}",
"",
])
return "\n".join(lines)
def _escape_label(value: str) -> str:
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
_AUTOMATION_OPERATION_SQL = """
WITH automation_scope AS (
SELECT
CASE
WHEN status <> 'success' THEN status
WHEN actor = 'approval_execution'
AND COALESCE(input->>'requested_by', '') NOT ILIKE 'auto%%'
THEN 'human_required'
ELSE 'auto_executed'
END AS outcome,
operation_type
FROM automation_operation_log
WHERE operation_type IN (
'playbook_executed',
'remediation_executed',
'remediation_verified',
'remediation_rolled_back',
'self_correction_attempted'
)
UNION ALL
SELECT
CASE WHEN success THEN 'auto_executed' ELSE 'failed' END AS outcome,
'auto_repair_executed' AS operation_type
FROM auto_repair_executions
)
SELECT
outcome,
operation_type,
count(*) AS count
FROM automation_scope
GROUP BY outcome, operation_type
ORDER BY outcome, operation_type
"""
_AUTOMATION_OPERATION_24H_SQL = """
WITH automation_scope AS (
SELECT
CASE
WHEN status <> 'success' THEN status
WHEN actor = 'approval_execution'
AND COALESCE(input->>'requested_by', '') NOT ILIKE 'auto%%'
THEN 'human_required'
ELSE 'auto_executed'
END AS outcome,
operation_type
FROM automation_operation_log
WHERE operation_type IN (
'playbook_executed',
'remediation_executed',
'remediation_verified',
'remediation_rolled_back',
'self_correction_attempted'
)
AND created_at >= NOW() - INTERVAL '24 hours'
UNION ALL
SELECT
CASE WHEN success THEN 'auto_executed' ELSE 'failed' END AS outcome,
'auto_repair_executed' AS operation_type
FROM auto_repair_executions
WHERE created_at >= NOW() - INTERVAL '24 hours'
)
SELECT
outcome,
operation_type,
count(*) AS count
FROM automation_scope
GROUP BY outcome, operation_type
ORDER BY outcome, operation_type
"""
_POST_EXECUTION_VERIFICATION_SQL = """
SELECT verification_result AS outcome, count(*) AS count
FROM incident_evidence
WHERE verification_result IS NOT NULL
GROUP BY verification_result
ORDER BY verification_result
"""
_POST_EXECUTION_VERIFICATION_24H_SQL = """
SELECT verification_result AS outcome, count(*) AS count
FROM incident_evidence
WHERE verification_result IS NOT NULL
AND collected_at >= NOW() - INTERVAL '24 hours'
GROUP BY verification_result
ORDER BY verification_result
"""
_HIGH_CONFIDENCE_APPROVAL_SQL = """
WITH approval_confidence AS (
SELECT
id,
incident_id,
COALESCE(
CASE
WHEN extra_metadata->>'confidence_score' ~ '^[0-9]+(\\.[0-9]+)?$'
THEN (extra_metadata->>'confidence_score')::numeric
ELSE NULL
END,
CASE
WHEN extra_metadata->>'confidence' ~ '^[0-9]+(\\.[0-9]+)?$'
THEN (extra_metadata->>'confidence')::numeric
ELSE NULL
END,
composite_score,
0
) AS confidence
FROM approval_records
)
SELECT
count(*) FILTER (WHERE confidence >= 0.8) AS high_confidence_total,
count(*) FILTER (
WHERE confidence >= 0.8
AND EXISTS (
SELECT 1
FROM incident_evidence ev
WHERE ev.incident_id = approval_confidence.incident_id
AND ev.verification_result = 'success'
)
) AS high_confidence_success_total
FROM approval_confidence
"""
_adr100_slo_metrics_service: Adr100SloMetricsService | None = None
def get_adr100_slo_metrics_service() -> Adr100SloMetricsService:
global _adr100_slo_metrics_service
if _adr100_slo_metrics_service is None:
_adr100_slo_metrics_service = Adr100SloMetricsService()
return _adr100_slo_metrics_service

View File

@@ -0,0 +1,743 @@
"""
Read-only ADR-100 SLO status snapshot.
GovernanceAgent.check_slo_compliance() can emit governance alerts when an SLO is
violated. This service is intentionally read-only so dashboards can show the
same Prometheus-backed state without producing Telegram/DB side effects.
"""
from __future__ import annotations
import math
from dataclasses import dataclass
from typing import Any
import httpx
import structlog
from sqlalchemy import text
from src.core.config import settings
from src.db.base import get_db_context
from src.utils.timezone import now_taipei_iso
logger = structlog.get_logger(__name__)
@dataclass(frozen=True)
class Adr100SloDefinition:
name: str
query: str
target: float
hard_red_line: float
direction: str
unit: str
window: str
denominator_query: str | None = None
denominator_window_seconds: int = 0
minimum_events: float = 1.0
ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = (
Adr100SloDefinition(
name="autonomy_rate",
query="sli:autonomy_rate:5m",
target=0.80,
hard_red_line=0.70,
direction="above",
unit="percent",
window="5m",
denominator_query="sum(rate(automation_operation_log_total[5m]))",
denominator_window_seconds=300,
),
Adr100SloDefinition(
name="decision_accuracy",
query="sli:decision_accuracy:5m",
target=0.90,
hard_red_line=0.85,
direction="above",
unit="percent",
window="5m",
denominator_query='sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))',
denominator_window_seconds=300,
),
Adr100SloDefinition(
name="confidence_calibration",
query="sli:confidence_calibration:1h",
target=0.80,
hard_red_line=0.70,
direction="above",
unit="percent",
window="1h",
denominator_query="sum(rate(approval_records_high_confidence_total[1h]))",
denominator_window_seconds=3600,
),
Adr100SloDefinition(
name="km_growth_rate",
query="max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)",
target=20.0,
hard_red_line=5.0,
direction="above",
unit="count",
window="24h",
),
)
class Adr100SloStatusService:
"""Fetch ADR-100 SLO status from Prometheus without writing governance events."""
async def fetch_report(self) -> dict[str, Any]:
prom_url = getattr(
settings,
"PROMETHEUS_URL",
"http://prometheus.observability.svc:9090",
).rstrip("/")
metrics: list[dict[str, Any]] = []
async with httpx.AsyncClient(timeout=5.0) as client:
for definition in ADR100_SLO_DEFINITIONS:
metrics.append(await self._fetch_metric(client, prom_url, definition))
evaluable = [metric for metric in metrics if metric.get("evaluable")]
ok_count = sum(1 for metric in evaluable if metric.get("status") == "ok")
overall_compliance = (ok_count / len(evaluable)) if evaluable else None
verification_coverage = await self._fetch_verification_coverage()
overall_status = _overall_status(metrics, evaluable, verification_coverage)
return {
"schema_version": "adr100_slo_status_v1",
"source": "prometheus+postgresql",
"evaluated_at": now_taipei_iso(),
"overall_status": overall_status,
"overall_compliance": overall_compliance,
"evaluable_count": len(evaluable),
"metric_count": len(metrics),
"metrics": metrics,
"verification_coverage": verification_coverage,
}
async def _fetch_metric(
self,
client: httpx.AsyncClient,
prom_url: str,
definition: Adr100SloDefinition,
) -> dict[str, Any]:
denominator_value: float | None = None
sample_count: float | None = None
if definition.denominator_query:
denominator_result = await _query_prometheus_value(
client,
prom_url,
definition.denominator_query,
)
if denominator_result["status"] != "ok":
return _metric_payload(
definition,
value=None,
status="no_data",
reason=denominator_result["reason"],
denominator_value=None,
sample_count=None,
)
denominator_value = float(denominator_result["value"])
sample_count = denominator_value * definition.denominator_window_seconds
if sample_count < definition.minimum_events:
return _metric_payload(
definition,
value=None,
status="skipped_low_volume",
reason="denominator_below_minimum_events",
denominator_value=denominator_value,
sample_count=sample_count,
)
value_result = await _query_prometheus_value(client, prom_url, definition.query)
if value_result["status"] != "ok":
status = (
"skipped_low_volume"
if value_result["reason"] == "prometheus_nan_or_inf"
else "no_data"
)
return _metric_payload(
definition,
value=None,
status=status,
reason=value_result["reason"],
denominator_value=denominator_value,
sample_count=sample_count,
)
value = float(value_result["value"])
status = _classify_status(value, definition)
return _metric_payload(
definition,
value=value,
status=status,
reason=None,
denominator_value=denominator_value,
sample_count=sample_count if sample_count is not None else value,
)
async def _fetch_verification_coverage(self) -> dict[str, Any]:
"""Summarize whether recent auto-repair executions have verifier evidence."""
try:
async with get_db_context() as db:
summary_row = (
await db.execute(text(_VERIFICATION_COVERAGE_SQL))
).mappings().one()
recent_rows = (
await db.execute(text(_VERIFICATION_COVERAGE_RECENT_SQL))
).mappings().all()
recent_non_success_rows = (
await db.execute(text(_VERIFICATION_COVERAGE_NON_SUCCESS_SQL))
).mappings().all()
except Exception as exc:
logger.warning("adr100_verification_coverage_query_error", error=str(exc))
return {
"schema_version": "adr100_verification_coverage_v1",
"source": "postgresql",
"window": "24h",
"status": "error",
"reason": "postgresql_query_error",
"evaluable": False,
"total_auto": 0,
"successful_auto": 0,
"verified_auto": 0,
"verified_success": 0,
"verified_non_success": 0,
"unverified_auto": 0,
"coverage_rate": None,
"verification_success_rate": None,
"last_auto_at": None,
"last_verified_auto_at": None,
"last_verification_evidence_at": None,
"latest_auto_age_seconds": None,
"last_verified_auto_age_seconds": None,
"recent_unverified": [],
"recent_non_success": [],
"non_success_breakdown": {
"by_verification_result": [],
"by_failure_class": [],
},
"remediation_queue": _remediation_queue_payload([]),
}
return _build_verification_coverage_payload(
summary_row,
recent_rows,
recent_non_success_rows,
)
_VERIFICATION_COVERAGE_SQL = """
WITH recent_auto AS (
SELECT id, incident_id, success, created_at
FROM auto_repair_executions
WHERE created_at >= NOW() - INTERVAL '24 hours'
),
per_auto AS (
SELECT
are.id,
are.incident_id,
are.success,
are.created_at,
latest.verification_result,
latest.collected_at AS verification_collected_at,
latest.self_healing_score
FROM recent_auto are
LEFT JOIN LATERAL (
SELECT ev.verification_result, ev.collected_at, ev.self_healing_score
FROM incident_evidence ev
WHERE ev.incident_id = are.incident_id
AND ev.verification_result IS NOT NULL
ORDER BY ev.collected_at DESC
LIMIT 1
) latest ON TRUE
)
SELECT
count(*)::int AS total_auto,
count(*) FILTER (WHERE success)::int AS successful_auto,
count(*) FILTER (WHERE verification_result IS NOT NULL)::int AS verified_auto,
count(*) FILTER (WHERE verification_result = 'success')::int AS verified_success,
count(*) FILTER (WHERE verification_result IN ('degraded','failed','timeout'))::int AS verified_non_success,
count(*) FILTER (WHERE verification_result IS NULL)::int AS unverified_auto,
max(created_at) AS last_auto_at,
max(created_at) FILTER (WHERE verification_result IS NOT NULL) AS last_verified_auto_at,
max(verification_collected_at) AS last_verification_evidence_at,
EXTRACT(EPOCH FROM (NOW() - max(created_at)))::int AS latest_auto_age_seconds,
EXTRACT(EPOCH FROM (NOW() - (max(created_at) FILTER (WHERE verification_result IS NOT NULL))))::int
AS last_verified_auto_age_seconds
FROM per_auto
"""
_VERIFICATION_COVERAGE_RECENT_SQL = """
WITH recent_auto AS (
SELECT id, incident_id, success, created_at
FROM auto_repair_executions
WHERE created_at >= NOW() - INTERVAL '24 hours'
),
per_auto AS (
SELECT
are.id,
are.incident_id,
are.success,
are.created_at,
latest.verification_result
FROM recent_auto are
LEFT JOIN LATERAL (
SELECT ev.verification_result
FROM incident_evidence ev
WHERE ev.incident_id = are.incident_id
AND ev.verification_result IS NOT NULL
ORDER BY ev.collected_at DESC
LIMIT 1
) latest ON TRUE
)
SELECT id, incident_id, success, created_at
FROM per_auto
WHERE verification_result IS NULL
ORDER BY created_at DESC
LIMIT 5
"""
_VERIFICATION_COVERAGE_NON_SUCCESS_SQL = """
WITH recent_auto AS (
SELECT
id,
incident_id,
success,
playbook_id,
playbook_name,
triggered_by,
risk_level,
error_message,
created_at
FROM auto_repair_executions
WHERE created_at >= NOW() - INTERVAL '24 hours'
),
per_auto AS (
SELECT
are.id AS auto_repair_id,
are.incident_id,
are.success AS auto_success,
are.playbook_id,
are.playbook_name,
are.triggered_by,
are.risk_level,
left(coalesce(are.error_message, ''), 240) AS auto_error,
are.created_at AS auto_created_at,
latest.verification_result,
latest.collected_at AS verification_collected_at,
left(coalesce(latest.post_execution_state::text, ''), 700) AS post_state_text,
left(coalesce(latest.evidence_summary, ''), 300) AS evidence_summary
FROM recent_auto are
LEFT JOIN LATERAL (
SELECT
ev.verification_result,
ev.collected_at,
ev.post_execution_state,
ev.evidence_summary
FROM incident_evidence ev
WHERE ev.incident_id = are.incident_id
AND ev.verification_result IS NOT NULL
ORDER BY ev.collected_at DESC
LIMIT 1
) latest ON TRUE
)
SELECT
p.*,
i.status::text AS incident_status,
i.severity::text AS incident_severity,
i.alert_category,
i.alertname
FROM per_auto p
LEFT JOIN incidents i ON i.incident_id = p.incident_id
WHERE p.verification_result IS NOT NULL
AND p.verification_result <> 'success'
ORDER BY p.auto_created_at DESC
LIMIT 8
"""
async def _query_prometheus_value(
client: httpx.AsyncClient,
prom_url: str,
query: str,
) -> dict[str, Any]:
try:
response = await client.get(
f"{prom_url}/api/v1/query",
params={"query": query},
)
data = response.json()
if data.get("status") != "success":
return {"status": "error", "reason": "prometheus_query_failed"}
results = data.get("data", {}).get("result", [])
if not results:
return {
"status": "no_data",
"reason": "prometheus_empty_result_metric_not_emitted",
}
raw_value = results[0]["value"][1]
value = float(raw_value)
if not math.isfinite(value):
return {
"status": "skipped",
"reason": "prometheus_nan_or_inf",
"raw_value": raw_value,
}
return {"status": "ok", "value": value}
except Exception as exc:
logger.warning("adr100_slo_prometheus_query_error", query=query, error=str(exc))
return {"status": "error", "reason": "prometheus_query_error"}
def _metric_payload(
definition: Adr100SloDefinition,
*,
value: float | None,
status: str,
reason: str | None,
denominator_value: float | None,
sample_count: float | None,
) -> dict[str, Any]:
return {
"name": definition.name,
"query": definition.query,
"value": value,
"target": definition.target,
"hard_red_line": definition.hard_red_line,
"direction": definition.direction,
"unit": definition.unit,
"window": definition.window,
"status": status,
"evaluable": status in {"ok", "warning", "violated"},
"reason": reason,
"denominator_query": definition.denominator_query,
"denominator_value": denominator_value,
"sample_count": sample_count,
}
def _classify_status(value: float, definition: Adr100SloDefinition) -> str:
if definition.direction == "above":
if value < definition.hard_red_line:
return "violated"
if value < definition.target:
return "warning"
return "ok"
if value > definition.hard_red_line:
return "violated"
if value > definition.target:
return "warning"
return "ok"
def _build_verification_coverage_payload(
summary_row: Any,
recent_unverified_rows: Any,
recent_non_success_rows: Any = (),
) -> dict[str, Any]:
row = dict(summary_row)
total_auto = int(row.get("total_auto") or 0)
verified_auto = int(row.get("verified_auto") or 0)
verified_success = int(row.get("verified_success") or 0)
verified_non_success = int(row.get("verified_non_success") or 0)
unverified_auto = int(row.get("unverified_auto") or 0)
if total_auto == 0:
status = "skipped_low_volume"
reason = "no_auto_repair_executions_24h"
evaluable = False
elif unverified_auto > 0:
status = "warning"
reason = "verification_backlog_present"
evaluable = True
elif verified_non_success > 0:
status = "warning"
reason = "non_success_verification_present"
evaluable = True
else:
status = "ok"
reason = None
evaluable = True
coverage_rate = (verified_auto / total_auto) if total_auto else None
verification_success_rate = (verified_success / verified_auto) if verified_auto else None
recent_non_success = [
_non_success_finding_payload(dict(raw))
for raw in recent_non_success_rows
]
remediation_queue = _remediation_queue_payload(recent_non_success)
return {
"schema_version": "adr100_verification_coverage_v1",
"source": "postgresql",
"window": "24h",
"status": status,
"reason": reason,
"evaluable": evaluable,
"total_auto": total_auto,
"successful_auto": int(row.get("successful_auto") or 0),
"verified_auto": verified_auto,
"verified_success": verified_success,
"verified_non_success": verified_non_success,
"unverified_auto": unverified_auto,
"coverage_rate": coverage_rate,
"verification_success_rate": verification_success_rate,
"last_auto_at": _iso(row.get("last_auto_at")),
"last_verified_auto_at": _iso(row.get("last_verified_auto_at")),
"last_verification_evidence_at": _iso(row.get("last_verification_evidence_at")),
"latest_auto_age_seconds": _int_or_none(row.get("latest_auto_age_seconds")),
"last_verified_auto_age_seconds": _int_or_none(row.get("last_verified_auto_age_seconds")),
"recent_unverified": [
{
"id": str(item.get("id")),
"incident_id": str(item.get("incident_id")),
"success": bool(item.get("success")),
"created_at": _iso(item.get("created_at")),
}
for item in (dict(raw) for raw in recent_unverified_rows)
],
"recent_non_success": recent_non_success,
"non_success_breakdown": {
"by_verification_result": _count_breakdown(
item["verification_result"] for item in recent_non_success
),
"by_failure_class": _count_breakdown(
item["failure_class"] for item in recent_non_success
),
"by_remediation_status": _count_breakdown(
item["remediation_status"] for item in remediation_queue["items"]
),
},
"remediation_queue": remediation_queue,
}
def _non_success_finding_payload(row: dict[str, Any]) -> dict[str, Any]:
failure_class = _classify_non_success_failure(row)
remediation = _remediation_for_failure_class(failure_class)
return {
"auto_repair_id": str(row.get("auto_repair_id")),
"incident_id": str(row.get("incident_id")),
"incident_status": str(row.get("incident_status") or "unknown"),
"incident_severity": str(row.get("incident_severity") or "unknown"),
"alert_category": row.get("alert_category"),
"alertname": row.get("alertname"),
"auto_success": bool(row.get("auto_success")),
"playbook_id": row.get("playbook_id"),
"playbook_name": row.get("playbook_name"),
"triggered_by": row.get("triggered_by"),
"risk_level": row.get("risk_level"),
"verification_result": str(row.get("verification_result") or "unknown"),
"failure_class": failure_class,
"next_step": _next_step_for_failure_class(failure_class),
"remediation_status": remediation["status"],
"remediation_action": remediation["action"],
"remediation_owner": remediation["owner"],
"remediation_reason": remediation["reason"],
"auto_error_excerpt": _short_text(row.get("auto_error"), 180),
"evidence_excerpt": _short_text(row.get("evidence_summary"), 180),
"auto_created_at": _iso(row.get("auto_created_at")),
"verification_collected_at": _iso(row.get("verification_collected_at")),
}
def _classify_non_success_failure(row: dict[str, Any]) -> str:
combined = " ".join(
str(row.get(key) or "")
for key in ("auto_error", "post_state_text", "evidence_summary")
).lower()
if "unsupported scheme" in combined:
return "unsupported_action_scheme"
if "missing_query_parameter" in combined:
return "verifier_missing_promql"
if "empty_pod_name" in combined:
return "verifier_target_missing_pod"
if not bool(row.get("auto_success")):
return "auto_repair_execution_failed"
result = str(row.get("verification_result") or "").lower()
if result in {"failed", "timeout"}:
return f"verification_{result}"
return "verification_degraded"
def _remediation_for_failure_class(failure_class: str) -> dict[str, str]:
"""Map a non-success verification class to a read-only remediation work item.
This is dashboard triage metadata only. It does not auto-close incidents,
replay repairs, or approve write actions.
"""
if failure_class == "unsupported_action_scheme":
return {
"status": "ready_for_replay",
"action": "replay_with_supported_executor",
"owner": "auto_repair_executor",
"reason": "executor_gateway_available_after_t23",
}
if failure_class == "verifier_missing_promql":
return {
"status": "ready_for_reverify",
"action": "reverify_with_promql_template",
"owner": "post_execution_verifier",
"reason": "promql_template_available_after_t23",
}
if failure_class == "verifier_target_missing_pod":
return {
"status": "needs_target_mapping",
"action": "map_target_and_reverify",
"owner": "post_execution_verifier",
"reason": "verifier_target_missing",
}
if failure_class == "auto_repair_execution_failed":
return {
"status": "needs_playbook_ticket",
"action": "create_playbook_ticket",
"owner": "solver_or_operator",
"reason": "execution_failed_after_route_normalization",
}
if failure_class in {"verification_failed", "verification_timeout"}:
return {
"status": "manual_review",
"action": "escalate_verification_failure",
"owner": "sre_operator",
"reason": "verifier_returned_hard_failure",
}
return {
"status": "manual_review",
"action": "inspect_degraded_evidence",
"owner": "sre_operator",
"reason": "degraded_evidence_requires_human_context",
}
def _next_step_for_failure_class(failure_class: str) -> str:
if failure_class == "unsupported_action_scheme":
return "normalize_playbook_executor"
if failure_class == "verifier_missing_promql":
return "add_verifier_query_template"
if failure_class == "verifier_target_missing_pod":
return "map_verifier_target"
if failure_class == "auto_repair_execution_failed":
return "review_auto_repair_execution"
if failure_class in {"verification_failed", "verification_timeout"}:
return "escalate_verification_failure"
return "review_degraded_verification"
def _remediation_queue_payload(recent_non_success: list[dict[str, Any]]) -> dict[str, Any]:
items: list[dict[str, Any]] = []
for item in recent_non_success:
items.append({
"work_item_id": (
f"verification:{item.get('incident_id')}:{item.get('auto_repair_id')}"
),
"incident_id": item.get("incident_id"),
"auto_repair_id": item.get("auto_repair_id"),
"alertname": item.get("alertname"),
"playbook_id": item.get("playbook_id"),
"failure_class": item.get("failure_class"),
"verification_result": item.get("verification_result"),
"remediation_status": item.get("remediation_status"),
"remediation_action": item.get("remediation_action"),
"remediation_owner": item.get("remediation_owner"),
"remediation_reason": item.get("remediation_reason"),
"source": "adr100_verification_coverage",
"auto_created_at": item.get("auto_created_at"),
"verification_collected_at": item.get("verification_collected_at"),
})
ready_for_ai = sum(
1 for item in items
if item.get("remediation_status") in {"ready_for_replay", "ready_for_reverify"}
)
needs_human = sum(
1 for item in items
if item.get("remediation_status") in {
"needs_target_mapping",
"needs_playbook_ticket",
"manual_review",
}
)
return {
"schema_version": "adr100_remediation_queue_v1",
"source": "recent_non_success_read_model",
"total": len(items),
"ready_for_ai": ready_for_ai,
"needs_human": needs_human,
"items": items,
"by_status": _count_breakdown(
item.get("remediation_status") for item in items
),
"by_action": _count_breakdown(
item.get("remediation_action") for item in items
),
}
def _count_breakdown(values: Any) -> list[dict[str, Any]]:
counts: dict[str, int] = {}
for value in values:
key = str(value or "unknown")
counts[key] = counts.get(key, 0) + 1
return [
{"name": name, "count": count}
for name, count in sorted(counts.items(), key=lambda item: (-item[1], item[0]))
]
def _short_text(value: Any, limit: int) -> str | None:
if value is None:
return None
text = " ".join(str(value).split())
if not text:
return None
return text[:limit]
def _iso(value: Any) -> str | None:
return value.isoformat() if hasattr(value, "isoformat") else None
def _int_or_none(value: Any) -> int | None:
return int(value) if value is not None else None
def _overall_status(
metrics: list[dict[str, Any]],
evaluable: list[dict[str, Any]],
verification_coverage: dict[str, Any] | None = None,
) -> str:
if any(metric.get("status") == "violated" for metric in metrics):
return "violated"
if verification_coverage and verification_coverage.get("status") in {"violated", "warning"}:
return str(verification_coverage["status"])
if any(metric.get("status") == "warning" for metric in metrics):
return "warning"
if evaluable and any(metric.get("status") == "skipped_low_volume" for metric in metrics):
return "partial"
if evaluable:
return "ok"
if any(metric.get("status") == "no_data" for metric in metrics):
return "no_data"
return "skipped_low_volume"
_adr100_slo_status_service: Adr100SloStatusService | None = None
def get_adr100_slo_status_service() -> Adr100SloStatusService:
global _adr100_slo_status_service
if _adr100_slo_status_service is None:
_adr100_slo_status_service = Adr100SloStatusService()
return _adr100_slo_status_service

View File

@@ -27,7 +27,7 @@ from __future__ import annotations
import asyncio
import dataclasses
import json
import os
import time
import uuid
from datetime import UTC, datetime
@@ -63,11 +63,25 @@ if TYPE_CHECKING:
logger = structlog.get_logger(__name__)
def _agent_debate_global_timeout_seconds() -> float:
"""Return the full Phase 2 debate timeout.
GCP Ollama incident analysis can legitimately take longer than the old
90s guard. Keep a hard ceiling, but make it an explicit deployment knob.
"""
raw = os.environ.get("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "420.0")
try:
timeout = float(raw)
except (TypeError, ValueError):
timeout = 420.0
return max(timeout, 90.0)
# 全局超時(所有 Agent 加起來)
# 2026-04-16 Claude Sonnet 4.6: deepseek-r1:14b 實測 2.2-27.3s avg 10.6s
# 原 30s 對 3 個序列 Agent 每個只剩 10s → 頻繁 timeout → confidence=20%
# 調整: 每 Agent 25s, 3個序列+1組並行 = 最差 75s + buffer = 90s
GLOBAL_TIMEOUT_SEC = 90.0
# 2026-05-06 Codex: configurable for GCP-A/GCP-B/111 Ollama-first incident
# diagnosis. The old 90s guard was cutting off valid deep diagnosis runs.
GLOBAL_TIMEOUT_SEC = _agent_debate_global_timeout_seconds()
# 2026-04-16 ogt + Claude Sonnet 4.6: 移除 _PER_AGENT_TIMEOUT_SEC
# LLM 必須等到完整回應,不得人工截斷。降級只在真正異常(連線失敗、模型崩潰)觸發。

View File

@@ -1,10 +1,10 @@
"""
Ollama Provider - Phase 24 ADR-052
====================================
本地 LLM 推理 (192.168.0.188 VMware VM, CPU-only)
本地 / 私有 LLM 推理 Provider。
搬移自: openclaw.py _call_ollama (L349-409)
特性: 免費、隱私安全 (local)、但 CPU 慢 (~97s/30tokens for qwen2.5:7b)
特性: 免費、隱私安全 (local)、可依 ADR-110 指向 GCP-A/GCP-B/111。
2026-04-02 ogt: Phase 24-A 從 openclaw.py 抽出
"""
@@ -29,6 +29,62 @@ from src.services.model_registry import get_model_registry
logger = structlog.get_logger(__name__)
settings = get_settings()
_GCP_LIGHTWEIGHT_MODELS = {
"gemma3:4b",
}
def _normalized_url(value: str | None) -> str:
return (value or "").rstrip("/")
def _is_gcp_alert_lane(endpoint_url: str) -> bool:
"""Return true for the CPU-only GCP-A/B synchronous alert lane."""
endpoint = _normalized_url(endpoint_url)
return endpoint in {
_normalized_url(getattr(settings, "OLLAMA_URL", "")),
_normalized_url(getattr(settings, "OLLAMA_SECONDARY_URL", "")),
}
def _resolve_model_for_endpoint(
*,
requested_model: str,
endpoint_url: str,
context: dict | None,
) -> str:
"""
Keep non-diagnosis calls from polluting the GCP diagnosis lane.
GCP-A/B are allowed to run the deep incident diagnosis model because the
alert goal is correctness and resolution, not the fastest Telegram card.
Accidental non-diagnosis workloads still fall back to the lightweight health
model so embedding/Hermes/background calls cannot occupy the same lane.
"""
model_name = requested_model.strip()
context = context or {}
allow_gcp_heavy = bool(context.get("allow_gcp_heavy_model"))
task_type = str(context.get("task_type") or context.get("intent_hint") or "").lower()
is_deep_diagnosis = task_type in {"diagnose", "alert_deep", "incident_diagnosis"}
if (
_is_gcp_alert_lane(endpoint_url)
and not allow_gcp_heavy
and not is_deep_diagnosis
and model_name not in _GCP_LIGHTWEIGHT_MODELS
):
fallback_model = str(getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "gemma3:4b")).strip() or "gemma3:4b"
logger.warning(
"ollama_gcp_non_diagnosis_model_coerced",
endpoint=endpoint_url,
requested_model=model_name,
safe_model=fallback_model,
task_type=task_type,
)
return fallback_model
return model_name
class OllamaProvider:
"""
@@ -77,11 +133,17 @@ class OllamaProvider:
client = await self._get_client()
registry = get_model_registry()
model_name = registry.get_model("ollama", "rca")
endpoint_url = self._endpoint_url()
requested_model = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
model_name = _resolve_model_for_endpoint(
requested_model=requested_model,
endpoint_url=endpoint_url,
context=context,
)
options = registry.get_provider_options("ollama")
# P0 2026-04-04 Claude Code: per-task timeoutOption C 分情境)
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s實測 ~173s)
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS
# 其他 → OPENCLAW_TIMEOUT既有設定
task_type = (context or {}).get("task_type", "")
if task_type in ("diagnose", "force_local"):
@@ -89,7 +151,6 @@ class OllamaProvider:
else:
read_timeout = float(settings.OPENCLAW_TIMEOUT)
endpoint_url = self._endpoint_url()
response = await client.post(
f"{endpoint_url}/api/generate",
json={
@@ -112,7 +173,13 @@ class OllamaProvider:
tokens = data.get("eval_count", 0) + data.get("prompt_eval_count", 0)
latency = (time.perf_counter() - start) * 1000
logger.info("ollama_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1))
logger.info(
"ollama_provider_success",
response_length=len(result),
tokens=tokens,
latency_ms=round(latency, 1),
model=model_name,
)
return AIResult(
raw_response=result,
success=True,
@@ -158,7 +225,7 @@ class OllamaProvider:
total_tokens = 0
messages: list[dict] = [{"role": "user", "content": prompt}]
registry = get_model_registry()
model_name = registry.get_model("ollama", "rca")
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
options = registry.get_provider_options("ollama")
task_type = (context or {}).get("task_type", "")
if task_type in ("diagnose", "force_local"):
@@ -268,33 +335,27 @@ class OllamaProvider:
self._http_client = None
# 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 — OLLAMA_188 provider 註冊
class Ollama188Provider(OllamaProvider):
# 2026-05-06 Codex — 188 不再作為 Ollama Provider本地備援統一命名為 ollama_local。
class OllamaLocalProvider(OllamaProvider):
"""
Ollama 188 CPU-only 備援 Provider
Ollama Local fallback Provider
繼承 OllamaProvider使用 OLLAMA_FALLBACK_URL192.168.0.188:11434
作為推理端點,模型預設 OLLAMA_HEALTH_CHECK_MODELqwen2.5:7b-instruct
B1 修復:原本 _init_registry 未登錄此 provider導致
executor.execute() 遇到 "ollama_188" → not_registered → 跳過,
188 從未被打到。此類別補全登錄鏈路。
2026-04-26 Wave5 B1-fix by Claude Engineer-A4
使用 OLLAMA_FALLBACK_URL 作為本地最後防線端點。
ADR-110 目前設定為 110 nginx proxy → 111 Ollama188 不得再作為 Ollama provider
"""
@property
def name(self) -> str:
return "ollama_188"
return "ollama_local"
@property
def is_enabled(self) -> bool:
import os
# 優先查 ENABLE_OLLAMA_188;若未設定(預設 true則看 OLLAMA_FALLBACK_URL 是否有值
env_override = os.getenv("ENABLE_OLLAMA_188", "true").lower() == "true"
# 優先查 ENABLE_OLLAMA_LOCAL;若未設定(預設 true則看 OLLAMA_FALLBACK_URL 是否有值
env_override = os.getenv("ENABLE_OLLAMA_LOCAL", "true").lower() == "true"
if not env_override:
return False
# OLLAMA_FALLBACK_URL 空字串 → 未設定 188 節點 → 停用
# OLLAMA_FALLBACK_URL 空字串 → 未設定本地節點 → 停用
return bool(getattr(settings, "OLLAMA_FALLBACK_URL", ""))
def _endpoint_url(self) -> str:
@@ -319,18 +380,18 @@ class Ollama188Provider(OllamaProvider):
client = await self._get_client()
registry = get_model_registry()
# 嘗試取 ollama_188 專屬設定fallback 到 ollama 預設
# 嘗試取本地 fallback 專屬設定fallback 到 ollama 預設
try:
model_name = registry.get_model("ollama_188", "rca")
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_local", "rca")).strip()
except Exception:
model_name = getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")
model_name = str((context or {}).get("ollama_model") or getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")).strip()
try:
options = registry.get_provider_options("ollama_188")
options = registry.get_provider_options("ollama_local")
except Exception:
options = registry.get_provider_options("ollama")
# CPU-only 備援:固定使用較長 timeoutCPU 推理慢)
# 本地備援:固定使用較長 timeout,避免 111 模型載入時被過早判死。
task_type = (context or {}).get("task_type", "")
if task_type in ("diagnose", "force_local"):
read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200))
@@ -359,11 +420,12 @@ class Ollama188Provider(OllamaProvider):
latency = (time.perf_counter() - start) * 1000
logger.info(
"ollama_188_provider_success",
"ollama_local_provider_success",
response_length=len(result),
tokens=tokens,
latency_ms=round(latency, 1),
endpoint=fallback_url,
model=model_name,
)
return AIResult(
raw_response=result,
@@ -375,12 +437,12 @@ class Ollama188Provider(OllamaProvider):
except httpx.TimeoutException as e:
latency = (time.perf_counter() - start) * 1000
logger.warning("ollama_188_provider_timeout", error=str(e), latency_ms=round(latency, 1))
logger.warning("ollama_local_provider_timeout", error=str(e), latency_ms=round(latency, 1))
return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=f"Timeout: {e}")
except Exception as e:
latency = (time.perf_counter() - start) * 1000
logger.warning("ollama_188_provider_failed", error=str(e), latency_ms=round(latency, 1))
logger.warning("ollama_local_provider_failed", error=str(e), latency_ms=round(latency, 1))
return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=str(e))
async def health_check(self) -> bool:

View File

@@ -274,14 +274,13 @@ class AIRateLimiter:
try:
from src.core.config import settings
from src.services.telegram_gateway import get_telegram_gateway
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
if not settings.OPENCLAW_TG_BOT_TOKEN or not target_chat_id:
logger.warning("telegram_not_configured_for_cost_alert")
return
import httpx
message = (
f"🚨🚨🚨 <b>AI 成本超限警報</b> 🚨🚨🚨\n\n"
f"Provider: <code>{provider.upper()}</code>\n"
@@ -292,15 +291,15 @@ class AIRateLimiter:
f"<code>redis-cli DEL ai_rate:total_cost:{provider}</code>"
)
async with httpx.AsyncClient(timeout=10.0) as client:
await client.post(
f"https://api.telegram.org/bot{settings.OPENCLAW_TG_BOT_TOKEN}/sendMessage",
json={
"chat_id": target_chat_id,
"text": message,
"parse_mode": "HTML",
},
)
gateway = get_telegram_gateway()
await gateway._send_request(
"sendMessage",
{
"chat_id": target_chat_id,
"text": message,
"parse_mode": "HTML",
},
)
logger.error(
"ai_cost_alert_sent",
@@ -327,13 +326,12 @@ class AIRateLimiter:
try:
from src.core.config import settings
from src.services.telegram_gateway import get_telegram_gateway
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
if not settings.OPENCLAW_TG_BOT_TOKEN or not target_chat_id:
return
import httpx
limit = COST_LIMITS[provider]["total_cost_usd"]
remaining = limit - current_cost
@@ -345,15 +343,15 @@ class AIRateLimiter:
f"接近上限,請注意監控!"
)
async with httpx.AsyncClient(timeout=10.0) as client:
await client.post(
f"https://api.telegram.org/bot{settings.OPENCLAW_TG_BOT_TOKEN}/sendMessage",
json={
"chat_id": target_chat_id,
"text": message,
"parse_mode": "HTML",
},
)
gateway = get_telegram_gateway()
await gateway._send_request(
"sendMessage",
{
"chat_id": target_chat_id,
"text": message,
"parse_mode": "HTML",
},
)
logger.warning(
"ai_cost_warning_sent",

View File

@@ -73,10 +73,6 @@ class AIProviderEnum(str, Enum):
"""AI 提供者"""
OLLAMA = "ollama"
# 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2
# P1.1b OllamaFailoverManager 使用 provider_name="ollama_188"
# 但 AIProviderEnum 沒有此值 → P1.2 整合時 lookup 失敗
OLLAMA_188 = "ollama_188" # 188 CPU-only 備援節點P1.1b
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災
# OllamaFailoverManager 回傳 provider_name="ollama_gcp_a"/"ollama_gcp_b"/"ollama_local"
# 缺少 enum 值 → AIProviderEnum(primary_str) 拋 ValueError → fallback chain 清空 → 直跳 Gemini
@@ -96,8 +92,6 @@ class AIProviderEnum(str, Enum):
# Provider 對應延遲預算 (ms)
PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = {
AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間
# 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2 — 188 CPU-only 推理較慢
AIProviderEnum.OLLAMA_188: 120000, # 120s budget for CPU inference
# 2026-05-04 ogt: ADR-110 GCP 三層容災 — GCP NVMe SSD 推理快60s 足夠
AIProviderEnum.OLLAMA_GCP_A: 60000,
AIProviderEnum.OLLAMA_GCP_B: 60000,
@@ -432,7 +426,7 @@ class AIRouter:
model = failover_result.primary.model
reason = f"{reason} [failover→{primary_str}]"
except ValueError:
# provider_name 無法對應已知 enum理論上不應發生OLLAMA_188 已加)
# provider_name 無法對應已知 enum;避免未知 provider 靜默進入執行層。
logger.warning(
"ai_router_unknown_failover_provider",
provider=primary_str,
@@ -848,14 +842,13 @@ class AIRouter:
空 dict 代表無資料或查詢失敗caller 應降級為忽略)。
"""
try:
from src.db.base import get_session_factory
from src.db.base import get_db_context
from src.repositories.aider_event_repository import AiderEventRepository
except ImportError:
return {}
try:
sf = get_session_factory()
async with sf() as sess:
async with get_db_context() as sess:
repo_obj = AiderEventRepository(sess)
stats = await repo_obj.model_stats_since(days=days)
except Exception:
@@ -1078,11 +1071,51 @@ class AIRouterExecutor:
cached = await redis.get(cache_key)
if cached:
data = _json.loads(cached)
cached_provider = data.get("provider", "cache")
provider_allowed = cached_provider in provider_order
ollama_first_required = (
bool(context)
and any(
key in context
for key in (
"alert_type",
"alertname",
"alert_name",
"fingerprint",
"incident_id",
"severity",
"target_resource",
)
)
and bool(provider_order)
and provider_order[0].startswith("ollama")
) or (
bool(context)
and bool(context.get("enforce_ollama_first"))
and bool(provider_order)
and provider_order[0].startswith("ollama")
)
if (
cached_provider == "ollama"
and any(provider.startswith("ollama") for provider in provider_order)
):
provider_allowed = True
if ollama_first_required and not cached_provider.startswith("ollama"):
provider_allowed = False
if not provider_allowed:
logger.info(
"ai_router_cache_provider_mismatch_skip",
cache_key=cache_key[:30],
cached_provider=cached_provider,
provider_order=provider_order,
ollama_first_required=ollama_first_required,
)
raise ValueError("cached provider not allowed by current provider_order")
logger.info("ai_router_cache_hit", cache_key=cache_key[:30])
return AIResult(
raw_response=data.get("response", ""),
success=True,
provider=data.get("provider", "cache"),
provider=cached_provider,
from_cache=True,
)
except Exception as e:
@@ -1107,6 +1140,10 @@ class AIRouterExecutor:
_lf_trace_ctx = None
errors: list[str] = []
attempted_providers: set[str] = set()
alert_requires_ollama_before_cloud = bool(
(context or {}).get("alert_requires_ollama_before_cloud")
)
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — DIAGNOSE fallback metric 追蹤
# 透過 context.get("intent_hint") 判斷是否為 DIAGNOSE避免改動 execute() 簽名
@@ -1156,13 +1193,31 @@ class AIRouterExecutor:
errors.append(f"{provider_name}: privacy_skip(non_local)")
continue
if alert_requires_ollama_before_cloud and provider.privacy_level == "cloud":
if "ollama_local" not in attempted_providers:
errors.append(f"{provider_name}: blocked_until_ollama_local_attempted")
logger.warning(
"ai_router_cloud_blocked_until_ollama_local_attempted",
provider=provider_name,
provider_order=provider_order,
attempted_providers=sorted(attempted_providers),
)
continue
# 閘門 1: Circuit Breaker (per-provider, C2 修復)
cb = self._get_circuit_breaker(provider_name)
if cb.is_open():
errors.append(f"{provider_name}: circuit_open")
logger.warning("ai_router_circuit_open", provider=provider_name)
# 2026-04-27 Claude Sonnet 4.6: F6 — circuit_open 不設 _last_attempted_provider未嘗試
continue
if alert_requires_ollama_before_cloud and provider_name.startswith("ollama"):
logger.warning(
"ai_router_alert_ollama_circuit_bypassed",
provider=provider_name,
reason="alert_requires_ollama_before_cloud",
)
else:
errors.append(f"{provider_name}: circuit_open")
logger.warning("ai_router_circuit_open", provider=provider_name)
# 2026-04-27 Claude Sonnet 4.6: F6 — circuit_open 不設 _last_attempted_provider未嘗試
continue
# 閘門 2: Rate Limiter
# 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo)
@@ -1182,6 +1237,7 @@ class AIRouterExecutor:
sem = self._get_semaphore(provider_name)
async with sem:
try:
attempted_providers.add(provider_name)
result = await provider.analyze(prompt, context)
if result.success:
@@ -1306,7 +1362,7 @@ def _init_registry() -> AIProviderRegistry:
"""初始化 Provider Registry (首次呼叫時自動註冊所有 Provider)"""
from src.services.ai_providers.ollama import (
OllamaProvider,
Ollama188Provider,
OllamaLocalProvider,
OllamaGcpBProvider, # 2026-05-04 ADR-110 GCP-B
)
from src.services.ai_providers.gemini import GeminiProvider
@@ -1327,8 +1383,9 @@ def _init_registry() -> AIProviderRegistry:
from src.services.ai_providers.nemotron import NemotronProvider
registry.register(NemotronProvider())
# 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 — 補登 OLLAMA_188 備援 provider
ollama_local = Ollama188Provider()
# 2026-05-06 Codex: 188 不再作為 Ollama provider
# Local fallback 統一命名為 ollama_local端點由 OLLAMA_FALLBACK_URL 指向 111/110 proxy。
ollama_local = OllamaLocalProvider()
registry.register(ollama_local)
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災修復
@@ -1337,7 +1394,7 @@ def _init_registry() -> AIProviderRegistry:
# 修復:
# "ollama_gcp_a" alias → 同 OllamaProviderOLLAMA_URL = GCP-A
# "ollama_gcp_b" → 新 OllamaGcpBProviderOLLAMA_SECONDARY_URL = GCP-B
# "ollama_local" alias → 同 Ollama188ProviderOLLAMA_FALLBACK_URL = 111
# "ollama_local" OllamaLocalProviderOLLAMA_FALLBACK_URL = 111 / 110:11437
registry._providers["ollama_gcp_a"] = ollama_gcp_a
registry.register(OllamaGcpBProvider())
registry._providers["ollama_local"] = ollama_local

View File

@@ -28,7 +28,7 @@ from datetime import timedelta
import structlog
from sqlalchemy import func, select, text
from src.db.base import get_session_factory
from src.db.base import get_db_context
from src.db.models import AiGovernanceEvent, AutoRepairExecution, ApprovalRecord
from src.utils.timezone import now_taipei
@@ -127,7 +127,7 @@ class AiSloCalculator:
try:
since = now_taipei() - timedelta(days=SLO_WINDOW_DAYS)
async with get_session_factory()() as session:
async with get_db_context() as session:
slo1 = await self._calc_auto_success_rate(session, since)
slo2 = await self._calc_human_override_rate(session, since)
slo3 = await self._calc_false_neg_rate(session, since)
@@ -210,7 +210,7 @@ class AiSloCalculator:
只在 any_violated=True 時呼叫。不管舊違反是否解決。
"""
try:
async with get_session_factory()() as session:
async with get_db_context() as session:
event = AiGovernanceEvent(
event_type="slo_violation",
details=report.to_dict(),

View File

@@ -0,0 +1,151 @@
"""Alert approval guardrails for AI-generated remediation actions.
This service runs before an Alertmanager-derived action becomes an
ApprovalRecord. It prevents a known failure mode: an LLM invents a kubectl
target that does not belong to the current alert domain, then the approval
pipeline faithfully executes or displays that bad command.
"""
from __future__ import annotations
from dataclasses import dataclass, field
import structlog
from src.services.action_parser import ActionKind, parse_kubectl_action
logger = structlog.get_logger(__name__)
_ALLOWED_K8S_NAMESPACES = frozenset({"awoooi-prod", "observability", "signoz", "langfuse"})
@dataclass(frozen=True)
class ApprovalActionGuardResult:
"""Guarded action payload returned to approval creation."""
action: str
blocked: bool = False
reason: str | None = None
metadata: dict[str, object] = field(default_factory=dict)
async def guard_alert_approval_action(
*,
action: str,
alert_namespace: str | None,
alertname: str,
alert_category: str,
) -> ApprovalActionGuardResult:
"""Validate an AI/rule action before it is persisted as an approval.
Non-kubectl actions are intentionally left to their domain-specific gates.
Kubectl actions must satisfy the structured parser and must not jump to an
unrelated namespace such as ``default`` or ``production`` when the alert
came from AWOOOI's production namespace.
"""
raw_action = (action or "").strip()
if not raw_action.lower().startswith("kubectl"):
return ApprovalActionGuardResult(action=action)
parsed = parse_kubectl_action(raw_action)
if not parsed.ok:
return _blocked(raw_action, f"invalid_kubectl:{parsed.reason}", alertname)
requested_namespace = parsed.namespace
expected_namespace = (alert_namespace or "awoooi-prod").strip() or "awoooi-prod"
if requested_namespace and requested_namespace not in _ALLOWED_K8S_NAMESPACES:
return _blocked(
raw_action,
f"namespace_not_allowed:{requested_namespace}",
alertname,
expected_namespace=expected_namespace,
)
if (
requested_namespace
and expected_namespace in _ALLOWED_K8S_NAMESPACES
and requested_namespace != expected_namespace
and requested_namespace != "observability"
):
return _blocked(
raw_action,
f"namespace_mismatch:{requested_namespace}!={expected_namespace}",
alertname,
expected_namespace=expected_namespace,
)
# Read-only commands are safe enough to display once the namespace is sane.
# Mutating commands still need resource existence checks to avoid executing
# hallucinated deployments like "flywheelexecutionratemissing".
if parsed.kind == ActionKind.READONLY and parsed.verb in {"get", "version"}:
return ApprovalActionGuardResult(action=action)
if parsed.resource_name and parsed.resource_type in {
"deployment",
"statefulset",
"daemonset",
"pod",
"service",
}:
try:
from src.services.resource_resolver import get_resource_resolver
resolver = get_resource_resolver()
resolved = await resolver.resolve(
raw_resource=parsed.resource_name,
namespace=requested_namespace or expected_namespace,
resource_kind=parsed.resource_type,
)
if not resolved.success:
return _blocked(
raw_action,
f"k8s_resource_not_found:{parsed.resource_type}/{parsed.resource_name}",
alertname,
expected_namespace=expected_namespace,
candidates=resolved.candidates,
)
except Exception as exc:
logger.warning(
"approval_action_resource_guard_unavailable",
alertname=alertname,
alert_category=alert_category,
action=raw_action[:160],
error=str(exc),
)
return ApprovalActionGuardResult(
action=action,
metadata={"action_guard_warning": "resource_guard_unavailable"},
)
return ApprovalActionGuardResult(action=action)
def _blocked(
raw_action: str,
reason: str,
alertname: str,
*,
expected_namespace: str | None = None,
candidates: list[str] | None = None,
) -> ApprovalActionGuardResult:
logger.warning(
"approval_action_blocked_before_persist",
alertname=alertname,
reason=reason,
action=raw_action[:160],
expected_namespace=expected_namespace,
candidates=candidates or [],
)
return ApprovalActionGuardResult(
action=f"NO_ACTION - INVALID_TARGET: {reason}; original={raw_action[:180]}",
blocked=True,
reason=reason,
metadata={
"action_guard": "blocked_before_persist",
"blocked_action": raw_action[:300],
"blocked_reason": reason,
"expected_namespace": expected_namespace,
"candidates": candidates or [],
},
)

View File

@@ -36,6 +36,17 @@ if TYPE_CHECKING:
logger = structlog.get_logger(__name__)
def _decode_redis_member(value: object, fallback: str) -> str:
"""Redis client 可能回 bytes 或 str統一成 str 供 DB / log 使用。"""
if isinstance(value, bytes):
return value.decode("utf-8", errors="replace")
if isinstance(value, str):
return value
if value is None:
return fallback
return str(value)
# =============================================================================
# Data Types
# =============================================================================
@@ -83,8 +94,9 @@ class AlertGroupingService:
# 5 分鐘滑動視窗
WINDOW_SECONDS: int = 300
# 觸發聚合的閾值(同一分組 5 分鐘內超過此數量才聚合)
GROUP_THRESHOLD: int = 3
# 觸發聚合的閾值:保留第一張主卡,第二個同組告警開始收斂。
# 2026-05-07 Codex — Telegram 群組噪音治理:舊值 3 會讓前兩張同類告警仍進 AI/Telegram。
GROUP_THRESHOLD: int = 2
# Redis Key 前綴
PREFIX_WINDOW = "alert_group:window:"
@@ -188,7 +200,10 @@ class AlertGroupingService:
count = results[2]
first_members = results[3]
parent_fingerprint = first_members[0] if first_members else fingerprint
parent_fingerprint = _decode_redis_member(
first_members[0] if first_members else None,
fallback=fingerprint,
)
# 是否為父告警(第一個)
is_parent = parent_fingerprint == fingerprint or count == 1

View File

@@ -25,14 +25,19 @@ Approval Execution Service - Phase 16 R4.2 瘦身 Router 抽取
import asyncio
import time
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any
from uuid import UUID
import structlog
from src.core.config import settings
from src.core.redis_client import get_redis
from src.db.base import get_db_context
from src.models.approval import ApprovalRequest
from src.plugins.mcp.gateway import GatewayContext, McpGateway, McpGatewayError
from src.plugins.mcp.interfaces import MCPToolResult
from src.services.approval_db import get_approval_service, get_timeline_service
from src.services.executor import OperationType, get_executor
from src.services.executor import ExecutionResult, OperationType, get_executor
from src.services.operation_parser import parse_operation_from_action
if TYPE_CHECKING:
@@ -45,6 +50,23 @@ logger = structlog.get_logger(__name__)
# 上限 60s 涵蓋 verifier warmup(10s) + collect(30s) + 緩衝 20s.
_VERIFIER_AWAIT_TIMEOUT_SEC = 60.0
# T9: approved SSH execution must go through AwoooP MCP Gateway.
# ApprovalRequest itself is the human/multi-sig decision artifact; for write/admin
# tools we project it into the short-lived Gate 5 Redis key expected by Gateway.
_SSH_GATEWAY_AGENT_ID = "approval_executor"
_SSH_GATEWAY_PROJECT_ID = "awoooi"
_SSH_GATEWAY_APPROVAL_TTL_SECONDS = 600
_SSH_GATEWAY_TOOL_SCOPES: dict[str, str] = {
"ssh_diagnose": "read",
"ssh_docker_restart": "write",
"ssh_docker_compose_restart": "write",
"ssh_systemctl_restart": "write",
"ssh_clear_docker_logs": "write",
"ssh_renew_ssl": "write",
"ssh_reload_nginx": "write",
"ssh_docker_prune": "admin",
}
class ApprovalExecutionService:
"""
@@ -222,6 +244,7 @@ class ApprovalExecutionService:
approval_id=str(approval.id),
action=approval.action,
reason="NO_ACTION - 純調查/觀察類,不執行破壞動作",
path="no_action",
)
# 標為 SUCCESS (觀察/調查本身就是成功完成)
await service.update_execution_status(approval.id, success=True)
@@ -248,6 +271,29 @@ class ApprovalExecutionService:
duration_ms=int((time.time() - _aol_started_ms) * 1000),
output={"reason": "NO_ACTION", "action": approval.action[:200]},
)
# F2 (2026-05-07 ogt + Claude Sonnet 4.6 + Codex):
# NO_ACTION 路徑要把 incident 推到 RESOLVED否則 incident 永遠卡
# INVESTIGATINGFlywheelExecutionRateMissing 死告警 + 566 stuck 增長根因 #1
# resolve_incident 內已加 RESOLVED 冪等 guard重複 resolve 會 idempotent
# return existing incident 不會重觸發 postmortem。
if approval.incident_id:
try:
from src.services.incident_service import get_incident_service
await get_incident_service().resolve_incident(approval.incident_id)
logger.info(
"incident_resolved_after_no_action_execution",
incident_id=approval.incident_id,
approval_id=str(approval.id),
path="no_action",
)
except Exception as _resolve_e:
logger.warning(
"incident_resolve_after_no_action_execution_failed",
incident_id=approval.incident_id,
approval_id=str(approval.id),
error=str(_resolve_e),
)
return True # NO_ACTION 視為成功完成
# 真解析失敗 (非 NO_ACTION)
@@ -614,7 +660,7 @@ class ApprovalExecutionService:
self,
approval: ApprovalRequest,
host: str,
) -> "ExecutionResult":
) -> ExecutionResult:
"""
執行 SSH 主機 action手動批准路徑專用
@@ -629,8 +675,6 @@ class ApprovalExecutionService:
- "ps aux" / "df -h" / "free -h" / "top" / "uptime" / 'echo' / 'ls -lah' → ssh_diagnose
- 其他:回傳失敗,提示 LLM 改寫 action
"""
from src.services.executor import ExecutionResult
start = time.time()
action = approval.action or ""
action_lower = action.lower().strip()
@@ -684,11 +728,20 @@ class ApprovalExecutionService:
error=err,
)
# 呼叫 SSH MCP Provider
from src.plugins.mcp.providers.ssh_provider import SSHProvider
provider = SSHProvider()
try:
mcp_result = await provider.execute(tool_name=tool_name, parameters=params)
logger.warning(
"mcp_gateway_approved_ssh_execution_path",
approval_id=str(approval.id),
incident_id=approval.incident_id,
tool=tool_name,
host=host,
agent_id=_SSH_GATEWAY_AGENT_ID,
)
mcp_result = await self._execute_ssh_tool_via_gateway(
approval=approval,
tool_name=tool_name,
params=params,
)
duration_ms = int((time.time() - start) * 1000)
success = bool(mcp_result.success)
return ExecutionResult(
@@ -719,6 +772,75 @@ class ApprovalExecutionService:
error=str(e),
)
async def _execute_ssh_tool_via_gateway(
self,
approval: ApprovalRequest,
tool_name: str,
params: dict[str, Any],
) -> MCPToolResult:
required_scope = _SSH_GATEWAY_TOOL_SCOPES.get(tool_name, "read")
run_id = approval.id if isinstance(approval.id, UUID) else UUID(str(approval.id))
if required_scope != "read":
approval_key = (
f"mcp_approval:{_SSH_GATEWAY_PROJECT_ID}:{_SSH_GATEWAY_AGENT_ID}:"
f"{tool_name}:{run_id}"
)
try:
redis = get_redis()
await redis.set(
approval_key,
"approved",
ex=_SSH_GATEWAY_APPROVAL_TTL_SECONDS,
)
except Exception as exc:
logger.warning(
"mcp_gateway_approval_projection_failed",
approval_id=str(approval.id),
tool=tool_name,
approval_key=approval_key,
error=str(exc),
)
params_with_audit = {
**params,
"_mcp_audit": {
"session_id": f"approval:{approval.id}",
"incident_id": approval.incident_id,
"agent_role": _SSH_GATEWAY_AGENT_ID,
"flywheel_node": "execute",
"approval_id": str(approval.id),
},
}
async with get_db_context(_SSH_GATEWAY_PROJECT_ID) as db:
ctx = GatewayContext(
project_id=_SSH_GATEWAY_PROJECT_ID,
agent_id=_SSH_GATEWAY_AGENT_ID,
tool_name=tool_name,
run_id=run_id,
trace_id=approval.incident_id or str(approval.id),
is_shadow=False,
environment={"env": "prod"},
required_scope=required_scope,
)
try:
return await McpGateway(db).call(ctx, params_with_audit)
except McpGatewayError as exc:
logger.warning(
"mcp_gateway_approved_ssh_blocked",
approval_id=str(approval.id),
incident_id=approval.incident_id,
tool=tool_name,
gate=exc.gate,
error_code=exc.error_code,
error=str(exc),
)
return MCPToolResult(
success=False,
execution_id=f"blocked:{tool_name}:{run_id}",
error=f"{exc.error_code}: {exc}",
)
async def _push_execution_result_to_alert(
self,
approval: ApprovalRequest,
@@ -736,7 +858,7 @@ class ApprovalExecutionService:
"""
try:
# 自動執行路徑 skip避免與 _push_auto_repair_result 重複發訊息)
if (approval.requested_by or "").lower() == "auto_approve":
if self._is_auto_approved_request(approval):
return
if not approval.incident_id:
@@ -812,9 +934,9 @@ class ApprovalExecutionService:
f"{km_info}"
)
await gateway._http_client.post(
f"https://api.telegram.org/bot{settings.OPENCLAW_TG_BOT_TOKEN}/sendMessage",
json={
await gateway._send_request(
"sendMessage",
{
"chat_id": target_chat_id,
"text": text,
"parse_mode": "HTML",
@@ -984,6 +1106,186 @@ class ApprovalExecutionService:
error=str(_e),
)
@staticmethod
def _is_auto_approved_request(approval: "ApprovalRequest") -> bool:
requested_by = (getattr(approval, "requested_by", "") or "").lower()
return requested_by.startswith("auto_approve")
@staticmethod
def _is_observation_only_action(action: str | None) -> bool:
action_upper = (action or "").strip().upper()
return (
not action_upper
or "NO_ACTION" in action_upper
or "NO-ACTION" in action_upper
or "NOACTION" in action_upper
or action_upper.startswith("OBSERVE")
or action_upper.startswith("INVESTIGATE")
)
@staticmethod
def _approval_risk_value(approval: "ApprovalRequest") -> str | None:
risk_level = getattr(approval, "risk_level", None)
if risk_level is None:
return None
return getattr(risk_level, "value", str(risk_level))
async def finalize_auto_approved_execution(
self,
approval: "ApprovalRequest",
*,
success: bool,
error_message: str | None = None,
) -> None:
"""
補齊「自動批准已執行」路徑的 incident-linked 證據鏈。
CS2/CS3 webhook 路徑為了快速執行,會先呼叫 execute_approved_action()
再建立 Incident。executor 當下沒有 incident_id導致 verifier/KM/
auto_repair_executions 都無法串回同一張告警卡。此方法只在 incident
建立後補上 durable trace不重新執行 action。
"""
if not self._is_auto_approved_request(approval):
return
incident_id = getattr(approval, "incident_id", None)
if not incident_id:
logger.warning(
"auto_approved_execution_finalize_skipped_no_incident",
approval_id=str(getattr(approval, "id", "")),
requested_by=getattr(approval, "requested_by", None),
)
return
if self._is_observation_only_action(getattr(approval, "action", None)):
logger.info(
"auto_approved_execution_finalize_skipped_observation_only",
approval_id=str(approval.id),
incident_id=incident_id,
action=(approval.action or "")[:120],
)
return
parsed = parse_operation_from_action(approval.action)
operation_type = parsed.operation_type
resource_name = parsed.resource_name or "unknown"
namespace = parsed.namespace or "default"
playbook_id = str(getattr(approval, "matched_playbook_id", None) or approval.id)[:36]
operation_label = operation_type.value if operation_type else "unknown"
playbook_name = f"approval_auto_execute:{operation_label}:{resource_name}"[:200]
triggered_by = (getattr(approval, "requested_by", None) or "auto_approve")[:50]
action_taken = f"auto_repair_playbook:{playbook_id}:{operation_label}:{resource_name}"
if not success:
action_taken = f"{action_taken}:FAILED"
error_message = error_message or "auto-approved executor returned failure; see approval/aol logs"
try:
from src.repositories.audit_log_repository import get_auto_repair_execution_repository
repo = get_auto_repair_execution_repository()
existing = await repo.list_by_incident(incident_id)
already_recorded = any(
str(getattr(row, "playbook_id", "")) == playbook_id
and getattr(row, "triggered_by", "") == triggered_by
and (approval.action or "") in list(getattr(row, "executed_steps", []) or [])
for row in existing
)
if not already_recorded:
await repo.create(
incident_id=incident_id,
playbook_id=playbook_id,
playbook_name=playbook_name,
success=success,
executed_steps=[approval.action],
error_message=error_message,
triggered_by=triggered_by,
risk_level=self._approval_risk_value(approval),
)
else:
logger.info(
"auto_approved_execution_record_already_exists",
approval_id=str(approval.id),
incident_id=incident_id,
playbook_id=playbook_id,
)
except Exception as exc:
logger.warning(
"auto_approved_execution_record_failed",
approval_id=str(approval.id),
incident_id=incident_id,
error=str(exc),
)
try:
timeline = get_timeline_service()
await timeline.add_event(
event_type="exec",
status="success" if success else "error",
title=f"{'' if success else ''} 自動批准執行已補鏈: {operation_label}",
description=(
f"Target: {resource_name} @ {namespace}; "
f"source={triggered_by}; action={approval.action[:160]}"
),
actor="leWOOOgo",
actor_role="executor",
approval_id=str(approval.id),
incident_id=incident_id,
)
except Exception as exc:
logger.warning(
"auto_approved_execution_timeline_failed",
approval_id=str(approval.id),
incident_id=incident_id,
error=str(exc),
)
try:
await self.write_execution_result_to_km(approval, success, error_message)
except Exception as exc:
logger.warning(
"auto_approved_execution_km_failed",
approval_id=str(approval.id),
incident_id=incident_id,
error=str(exc),
)
from src.core.feature_flags import aiops_flags
if aiops_flags.is_sub_flag_enabled("AIOPS_P1_POST_EXECUTION_VERIFIER"):
try:
await asyncio.wait_for(
self._run_post_execution_verify(
approval=approval,
action_taken=action_taken,
),
timeout=_VERIFIER_AWAIT_TIMEOUT_SEC,
)
except asyncio.TimeoutError:
logger.warning(
"auto_approved_execution_post_verify_timeout",
approval_id=str(approval.id),
incident_id=incident_id,
timeout_sec=_VERIFIER_AWAIT_TIMEOUT_SEC,
)
if success:
try:
from src.services.incident_service import get_incident_service
await get_incident_service().resolve_incident(incident_id)
logger.info(
"incident_resolved_after_auto_approved_execution_finalize",
incident_id=incident_id,
approval_id=str(approval.id),
)
except Exception as exc:
logger.warning(
"incident_resolve_after_auto_approved_execution_finalize_failed",
incident_id=incident_id,
approval_id=str(approval.id),
error=str(exc),
)
async def write_execution_result_to_km(
self,
approval: "ApprovalRequest",
@@ -1002,7 +1304,7 @@ class ApprovalExecutionService:
from src.services.km_writer import KMWritePayload, km_write_with_flag
# 來源辨識B.1 精修)
_is_auto = (approval.requested_by or "").lower() == "auto_approve"
_is_auto = self._is_auto_approved_request(approval)
_mode_prefix = "[自動修復]" if _is_auto else "[人工修復]"
_mode_tag = "auto_executed" if _is_auto else "human_approved"

View File

@@ -22,9 +22,10 @@ Phase 8: 自動化層實作
- P0/P1 嚴重度 Incident 需要人工確認
"""
from dataclasses import dataclass
from collections.abc import Callable
from typing import Protocol
from dataclasses import dataclass
import re
from typing import Any, Protocol
import structlog
@@ -81,6 +82,55 @@ class AutoRepairResult:
execution_time_ms: int = 0
@dataclass(frozen=True)
class _SshMcpRoute:
"""Route a legacy SSH playbook command to a governed MCP tool."""
tool_name: str
params: dict[str, Any]
_SHORT_HOST_MAP: dict[str, str] = {
"110": "192.168.0.110",
"120": "192.168.0.120",
"121": "192.168.0.121",
"188": "192.168.0.188",
}
_SSH_DIAGNOSTIC_KEYWORDS = (
"ps aux",
"docker stats",
"docker inspect",
"docker logs",
"docker ps",
"docker top",
"df -h",
"du -",
"free -h",
"journalctl",
"systemctl show",
"tail ",
"top ",
"uptime",
)
_SSH_WRITE_KEYWORDS = (
"docker restart",
"docker start",
"docker stop",
"docker rm",
"docker prune",
"systemctl restart",
"systemctl stop",
"systemctl start",
"truncate ",
" rm ",
"rm -",
"certbot renew",
"bash ",
)
# =============================================================================
# Auto Repair Service Interface
# =============================================================================
@@ -108,6 +158,7 @@ class IAutoRepairService(Protocol):
self,
incident: Incident,
playbook: Playbook,
run_post_verification: bool = True,
) -> AutoRepairResult:
"""
執行自動修復
@@ -320,7 +371,16 @@ class AutoRepairService:
)
# 4. 檢查最佳匹配
best_match = recommendations[0]
best_match = self._select_best_recommendation(recommendations, symptoms)
if best_match is not recommendations[0]:
logger.warning(
"auto_repair_exact_match_prioritized",
incident_id=incident.incident_id,
selected_playbook_id=best_match.playbook.playbook_id,
original_playbook_id=recommendations[0].playbook.playbook_id,
selected_similarity=best_match.similarity_score,
original_similarity=recommendations[0].similarity_score,
)
# 2026-04-07 Claude Code: 統帥指令「直接全部跳成自動修復」
# 移除: 相似度門檻、is_high_quality 門檻、冷啟動機制、風險等級門檻
@@ -378,6 +438,7 @@ class AutoRepairService:
playbook: Playbook,
is_cold_start: bool = False,
similarity_score: float | None = None,
run_post_verification: bool = True,
) -> AutoRepairResult:
"""
執行自動修復
@@ -414,6 +475,8 @@ class AutoRepairService:
executed_steps.append(
f"Step {step.step_number}: {step.command[:50]}... -> {step_result}"
)
if self._is_step_failure_result(step_result):
raise RuntimeError(f"Step {step.step_number} failed: {step_result}")
# 更新 Playbook 統計
await self._playbook_service.record_execution(
@@ -457,6 +520,8 @@ class AutoRepairService:
except Exception as _db_e:
logger.error("auto_repair_db_write_failed", error=str(_db_e))
self._record_auto_repair_metric(playbook, success=True)
# 2026-04-07 Claude Code: Sprint 4 B1/B2 — 記錄處置類型
# P0-1 Fix: 統一使用 AnomalyCounter.hash_signature()
try:
@@ -577,10 +642,17 @@ class AutoRepairService:
error=str(_inner_e),
)
_vl_task = _asyncio.create_task(_verify_and_learn())
if hasattr(self, "_pending_tasks"):
self._pending_tasks.add(_vl_task)
_vl_task.add_done_callback(self._pending_tasks.discard)
if run_post_verification:
_vl_task = _asyncio.create_task(_verify_and_learn())
if hasattr(self, "_pending_tasks"):
self._pending_tasks.add(_vl_task)
_vl_task.add_done_callback(self._pending_tasks.discard)
else:
logger.info(
"auto_repair_service_post_verify_delegated",
incident_id=incident.incident_id,
playbook_id=playbook.playbook_id,
)
except Exception as _vl_e:
logger.warning("auto_repair_verifier_setup_failed", error=str(_vl_e))
@@ -630,6 +702,8 @@ class AutoRepairService:
except Exception as _db_e:
logger.error("auto_repair_db_write_failed", error=str(_db_e))
self._record_auto_repair_metric(playbook, success=False)
# 2026-04-04 Claude Code: Phase 25 P1 — 失敗修復後 fire-and-forget 生成 ANTI_PATTERN
# 2026-04-05 Claude Code: I1 修正 — 補齊 _pending_tasks GC 防護(對稱化)
try:
@@ -684,6 +758,44 @@ class AutoRepairService:
keywords=keywords[:10],
)
def _select_best_recommendation(
self,
recommendations,
symptoms: SymptomPattern,
):
"""Prefer deterministic alert/service matches over fuzzy similarity only.
A higher fuzzy score must not outrank a playbook that explicitly names the
firing alert or affected service. Live-fire T16 proved that this can route
a safe K8s canary into an unrelated host diagnostic playbook.
"""
symptom_alerts = {str(name) for name in (symptoms.alert_names or []) if name}
symptom_services = {
str(service) for service in (symptoms.affected_services or []) if service
}
def _priority(recommendation) -> tuple[int, int, float]:
pattern = recommendation.playbook.symptom_pattern
playbook_alerts = {
str(name) for name in (pattern.alert_names or []) if name
}
playbook_services = {
str(service) for service in (pattern.affected_services or []) if service
}
alert_exact = int(bool(symptom_alerts & playbook_alerts))
service_exact = int(bool(symptom_services & playbook_services))
return (alert_exact, service_exact, float(recommendation.similarity_score or 0.0))
return max(recommendations, key=_priority)
@staticmethod
def _is_step_failure_result(step_result: str) -> bool:
"""Treat executor-declared failures as failed auto-repair executions."""
normalized = (step_result or "").strip().upper()
return normalized.startswith("FAILED:") or normalized == "UNKNOWN_ACTION_TYPE"
def _get_max_risk_level(self, playbook: Playbook) -> RiskLevel:
"""取得 Playbook 中最高的風險等級"""
risk_order = {
@@ -700,6 +812,35 @@ class AutoRepairService:
return max_risk
def _record_auto_repair_metric(self, playbook: Playbook, success: bool) -> None:
"""把實際 auto-repair 執行寫入 Prometheus 指標。
2026-05-06 ogt + CodexDB 已有 auto_repair_executions
core.metrics.record_auto_repair() 長期零 caller導致治理/心跳用
Prometheus 看起來像「飛輪沒做事」。label 使用 action_type避免
playbook_id 造成高基數。
"""
try:
from src.core.metrics import record_auto_repair
first_step = playbook.repair_steps[0] if playbook.repair_steps else None
action = first_step.action_type.value if first_step else "unknown"
max_risk = self._get_max_risk_level(playbook)
tier = {
RiskLevel.LOW: 1,
RiskLevel.MEDIUM: 2,
RiskLevel.HIGH: 3,
RiskLevel.CRITICAL: 4,
}.get(max_risk, 0)
record_auto_repair(action=action, tier=tier, success=success)
except Exception as e:
logger.warning(
"auto_repair_metric_record_failed",
playbook_id=playbook.playbook_id,
success=success,
error=str(e),
)
def _is_host_or_backup_incident(self, incident: Incident) -> bool:
"""主機/備份類事件只能走 SSH/只讀診斷,不允許 K8s rollout 類修復。"""
@@ -827,6 +968,175 @@ class AutoRepairService:
# 安全降級:檢查失敗 → 保守拒絕
return False
def _route_legacy_ssh_command_to_mcp(
self,
incident: Incident,
command: str,
) -> _SshMcpRoute | None:
"""Map read-only legacy ``ssh {host} '...'`` steps to MCP Gateway.
YAML_RULE playbooks predate the URI executor and can contain compound
shell diagnostics. Those commands should not bypass the newer
scheme-based HostRepairAgent or loosen its shell safety guard; read-only
diagnostics are instead routed to the governed SSH MCP provider.
"""
raw_command = (command or "").strip()
lowered = raw_command.lower()
if not lowered.startswith("ssh "):
return None
if any(token in lowered for token in _SSH_WRITE_KEYWORDS):
return None
if not any(token in lowered for token in _SSH_DIAGNOSTIC_KEYWORDS):
return None
host = self._resolve_ssh_host_for_incident(incident, raw_command)
if not host:
return None
params: dict[str, Any] = {"host": host}
container_name = self._resolve_container_name_for_incident(incident, raw_command)
if container_name:
params["container_name"] = container_name
return _SshMcpRoute(tool_name="ssh_diagnose", params=params)
def preview_read_only_ssh_mcp_route(
self,
incident: Incident,
command: str,
) -> dict[str, Any] | None:
"""Preview whether a legacy SSH diagnostic can use the MCP Gateway.
This is used by remediation dry-runs to prove the supported executor
path without running the original PlayBook step or writing an execution
result.
"""
route = self._route_legacy_ssh_command_to_mcp(incident, command)
if route is None:
return None
return {
"tool_name": route.tool_name,
"params": route.params,
"agent_id": "auto_repair_executor",
"required_scope": "read",
"flywheel_node": "execute",
}
def _resolve_ssh_host_for_incident(self, incident: Incident, command: str) -> str:
"""Resolve ``{host}``, short host labels, and exporter instance ports."""
labels = self._incident_labels(incident)
raw_host = ""
match = re.match(r"ssh\s+([^\s'\"]+)", command.strip(), flags=re.IGNORECASE)
if match:
raw_host = match.group(1)
if not raw_host or "{" in raw_host or "}" in raw_host:
raw_host = (
str(labels.get("host") or "")
or str(labels.get("instance") or "")
or str(labels.get("node") or "")
or str(labels.get("exported_instance") or "")
)
return self._normalize_ssh_host(raw_host)
@staticmethod
def _normalize_ssh_host(raw_host: str) -> str:
host = (raw_host or "").strip()
if host.startswith("ssh://"):
host = host.removeprefix("ssh://")
if "@" in host:
host = host.rsplit("@", 1)[1]
if host.startswith("[") and "]" in host:
host = host[1:host.index("]")]
if host.count(":") == 1:
maybe_host, maybe_port = host.rsplit(":", 1)
if maybe_port.isdigit():
host = maybe_host
if host in _SHORT_HOST_MAP:
return _SHORT_HOST_MAP[host]
match = re.fullmatch(r"(?:node-exporter-|host-)?(110|120|121|188)", host)
if match:
return _SHORT_HOST_MAP[match.group(1)]
return host
def _resolve_container_name_for_incident(
self,
incident: Incident,
command: str,
) -> str:
labels = self._incident_labels(incident)
for key in ("container_name", "container", "name"):
value = str(labels.get(key) or "").strip()
if value and "{" not in value and "}" not in value:
return value
match = re.search(
r"docker\s+(?:stats\s+--no-stream|inspect|logs|top|ps\s+-a\s+--filter\s+name=)\s+([a-zA-Z0-9._-]+)",
command,
)
return match.group(1) if match else ""
@staticmethod
def _incident_labels(incident: Incident) -> dict[str, Any]:
for signal in incident.signals or []:
labels = getattr(signal, "labels", None)
if labels:
return labels
return {}
async def _execute_ssh_mcp_route(
self,
incident: Incident,
route: _SshMcpRoute,
) -> str:
"""Execute a routed SSH diagnostic through AwoooP MCP Gateway."""
try:
from src.db.base import get_db_context
from src.plugins.mcp.gateway import GatewayContext, McpGateway, McpGatewayError
from src.services.mcp_audit_context import with_mcp_audit_context
incident_id = incident.incident_id
params = with_mcp_audit_context(
route.params,
session_id=f"incident:{incident_id}:auto_repair_execute",
incident_id=incident_id,
flywheel_node="execute",
agent_role="auto_repair_executor",
)
async with get_db_context("awoooi") as db:
ctx = GatewayContext(
project_id="awoooi",
agent_id="auto_repair_executor",
tool_name=route.tool_name,
trace_id=incident_id,
is_shadow=False,
environment={"env": "prod"},
required_scope="read",
)
result = await McpGateway(db).call(ctx, params)
except McpGatewayError as exc:
return f"FAILED: mcp:{route.tool_name} {exc.error_code}: {exc}"
except Exception as exc:
logger.warning(
"auto_repair_ssh_mcp_route_failed",
incident_id=incident.incident_id,
tool=route.tool_name,
error=str(exc),
)
return f"FAILED: mcp:{route.tool_name} {exc}"
if result.success:
preview = str(result.output or "")[:500]
return f"SUCCESS: mcp:{route.tool_name} {preview}".strip()
return f"FAILED: mcp:{route.tool_name} {result.error or 'execution failed'}"
async def _execute_step(self, incident: Incident, step) -> str:
"""
執行單一修復步驟
@@ -858,6 +1168,10 @@ class AutoRepairService:
# 2026-04-06 Claude Code: Sprint 3 — repair_by_uri (URI scheme 路由)
if step.action_type == ActionType.SSH_COMMAND:
route = self._route_legacy_ssh_command_to_mcp(incident, step.command)
if route is not None:
return await self._execute_ssh_mcp_route(incident, route)
from src.services.host_repair_agent import HostRepairAgent
agent = HostRepairAgent()
approved = not getattr(step, "requires_approval", False)

View File

@@ -0,0 +1,433 @@
"""AwoooP Ansible audit helpers.
This module is intentionally non-executing. It exposes the Ansible audit
contract and repo-known playbook catalog so the truth chain can say whether
Ansible was actually considered or executed, without pretending that catalog
hints are runtime remediation.
"""
from __future__ import annotations
import json
from typing import Any
import structlog
from sqlalchemy import text
from src.db.base import get_db_context
logger = structlog.get_logger(__name__)
ANSIBLE_OPERATION_TYPES = frozenset({
"ansible_candidate_matched",
"ansible_check_mode_executed",
"ansible_apply_executed",
"ansible_rollback_executed",
"ansible_execution_skipped",
})
_CATALOG: tuple[dict[str, Any], ...] = (
{
"catalog_id": "ansible:110-devops",
"playbook_path": "infra/ansible/playbooks/110-devops.yml",
"inventory_hosts": ["host_110"],
"domains": ["swap", "harbor", "sentry", "gitea", "langfuse", "bitan", "runner", "keepalived", "nginx"],
"keywords": [
"110",
"docker",
"container",
"dockercontainerunhealthy",
"swap",
"harbor",
"sentry",
"gitea",
"langfuse",
"bitan",
"runner",
"github-runner",
"keepalived",
],
"supports_check_mode": True,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "medium",
},
{
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"inventory_hosts": ["host_188"],
"domains": ["docker", "momo_backup", "signoz", "minio", "litellm", "n8n", "open_webui", "nginx"],
"keywords": [
"188",
"docker",
"container",
"dockercontainerunhealthy",
"momo",
"backup",
"postgresql",
"pg_backup",
"signoz",
"minio",
"litellm",
"n8n",
"open-webui",
"openwebui",
"docker-registry",
],
"supports_check_mode": True,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "medium",
},
{
"catalog_id": "ansible:nginx-sync",
"playbook_path": "infra/ansible/playbooks/nginx-sync.yml",
"inventory_hosts": ["host_110", "host_188"],
"domains": ["nginx", "proxy", "ollama_proxy", "tls"],
"keywords": ["nginx", "proxy", "ollama", "gcp", "tls", "cert", "502", "upstream"],
"supports_check_mode": True,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "medium",
},
{
"catalog_id": "ansible:restore-password-auth",
"playbook_path": "infra/ansible/playbooks/restore-password-auth.yml",
"inventory_hosts": ["host_110", "host_120", "host_121", "host_188"],
"domains": ["ssh", "password_auth"],
"keywords": ["ssh", "passwordauthentication", "password auth", "login", "auth"],
"supports_check_mode": False,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "high",
},
)
def _get(row: dict[str, Any], key: str) -> Any:
return row.get(key)
def _tags(row: dict[str, Any]) -> list[str]:
raw = _get(row, "tags")
if isinstance(raw, list):
return [str(item).lower() for item in raw]
if isinstance(raw, str):
return [part.strip().lower() for part in raw.split(",") if part.strip()]
return []
def _first_present(row: dict[str, Any], keys: tuple[str, ...]) -> Any:
for key in keys:
value = _get(row, key)
if value not in (None, ""):
return value
return None
def _is_ansible_operation(row: dict[str, Any]) -> bool:
operation_type = str(_get(row, "operation_type") or "").lower()
if operation_type in ANSIBLE_OPERATION_TYPES:
return True
if "ansible" in _tags(row):
return True
executor = str(
_first_present(
row,
(
"input_executor",
"input_execution_backend",
"output_executor",
"output_execution_backend",
),
)
or ""
).lower()
if executor == "ansible":
return True
playbook_path = str(
_first_present(row, ("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"))
or ""
).lower()
return "infra/ansible/" in playbook_path or playbook_path.endswith(".yml") and "ansible" in playbook_path
def _ansible_record(row: dict[str, Any]) -> dict[str, Any]:
return {
"op_id": _get(row, "op_id"),
"operation_type": _get(row, "operation_type"),
"status": _get(row, "status"),
"actor": _get(row, "actor"),
"playbook_id": _first_present(row, ("input_playbook_id", "output_playbook_id")),
"playbook_path": _first_present(
row,
("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"),
),
"check_mode": _first_present(row, ("input_check_mode", "output_check_mode")),
"not_used_reason": _first_present(row, ("input_not_used_reason", "output_not_used_reason")),
"dry_run_result": _get(row, "dry_run_result"),
"error": _get(row, "error"),
"duration_ms": _get(row, "duration_ms"),
"tags": _get(row, "tags"),
"created_at": _get(row, "created_at"),
}
def _flatten_text(value: Any, pieces: list[str], remaining: int = 80) -> int:
if remaining <= 0 or value is None:
return remaining
if isinstance(value, dict):
for key, item in value.items():
remaining = _flatten_text(key, pieces, remaining)
remaining = _flatten_text(item, pieces, remaining)
if remaining <= 0:
break
return remaining
if isinstance(value, list):
for item in value:
remaining = _flatten_text(item, pieces, remaining)
if remaining <= 0:
break
return remaining
pieces.append(str(value).lower())
return remaining - 1
def _source_haystack(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> str:
pieces: list[str] = []
_flatten_text(incident, pieces)
_flatten_text(drift, pieces)
return " ".join(pieces)
def _catalog_hints(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> dict[str, Any]:
haystack = _source_haystack(incident, drift)
candidates: list[dict[str, Any]] = []
unmatched: list[str] = []
for item in _CATALOG:
matched = [keyword for keyword in item["keywords"] if keyword in haystack]
public_item = {
key: value
for key, value in item.items()
if key
in {
"catalog_id",
"playbook_path",
"inventory_hosts",
"domains",
"supports_check_mode",
"auto_apply_enabled",
"approval_required",
"risk_level",
}
}
if matched:
candidates.append({
**public_item,
"match_score": len(matched),
"matched_keywords": matched,
})
else:
unmatched.append(item["catalog_id"])
candidates.sort(key=lambda row: (-int(row["match_score"]), str(row["catalog_id"])))
return {
"match_mode": "static_catalog_keyword_hint_v1",
"decision_effect": "none",
"available_count": len(_CATALOG),
"candidates": candidates,
"unmatched_catalog_ids": unmatched,
}
def build_ansible_truth(
automation_ops: list[dict[str, Any]],
*,
incident: dict[str, Any] | None,
drift: dict[str, Any] | None,
) -> dict[str, Any]:
"""Build the truth-chain Ansible section from audited facts and catalog hints."""
records = [_ansible_record(row) for row in automation_ops if _is_ansible_operation(row)]
return {
"considered": bool(records),
"records": records,
"audit_contract": {
"schema_version": "ansible_executor_audit_v1",
"operation_types": sorted(ANSIBLE_OPERATION_TYPES),
"required_audit_fields": [
"operation_type",
"status",
"actor",
"input.executor",
"input.playbook_path",
"input.check_mode",
"output.not_used_reason",
"dry_run_result",
],
"default_execution_mode": "catalog/dry-run audit only until approval execution is explicitly wired",
},
"candidate_catalog": _catalog_hints(incident, drift),
"not_used_reason": (
None
if records
else "no automation_operation_log row with Ansible operation type, tag, or executor backend for this source"
),
}
def _incident_public_dict(incident: Any) -> dict[str, Any]:
if incident is None:
return {}
if isinstance(incident, dict):
return incident
severity = getattr(incident, "severity", None)
signals_payload: list[dict[str, Any]] = []
for signal in getattr(incident, "signals", None) or []:
signals_payload.append({
"alert_name": getattr(signal, "alert_name", None),
"labels": getattr(signal, "labels", None) or {},
"annotations": getattr(signal, "annotations", None) or {},
})
return {
"incident_id": getattr(incident, "incident_id", None),
"project_id": getattr(incident, "project_id", None),
"alertname": getattr(incident, "alertname", None),
"alert_category": getattr(incident, "alert_category", None),
"notification_type": getattr(incident, "notification_type", None),
"severity": getattr(severity, "value", severity),
"affected_services": getattr(incident, "affected_services", None) or [],
"signals": signals_payload,
}
def build_ansible_decision_audit_payload(
*,
incident: Any,
proposal_data: dict[str, Any],
decision_path: str,
not_used_reason: str,
) -> dict[str, Any] | None:
"""Return an AOL payload when Ansible has catalog candidates for a decision."""
incident_payload = _incident_public_dict(incident)
hints = _catalog_hints(incident_payload, None)
candidates = hints.get("candidates") or []
if not candidates:
return None
incident_id = str(incident_payload.get("incident_id") or "")
input_payload = {
"incident_id": incident_id,
"executor": "ansible",
"execution_backend": "ansible",
"decision_path": decision_path,
"check_mode": True,
"apply_enabled": False,
"approval_required": True,
"candidate_catalog_schema": hints["match_mode"],
"executor_candidates": [
{
"catalog_id": row["catalog_id"],
"playbook_path": row["playbook_path"],
"inventory_hosts": row["inventory_hosts"],
"risk_level": row["risk_level"],
"match_score": row["match_score"],
"matched_keywords": row["matched_keywords"],
}
for row in candidates[:5]
],
"proposal_source": proposal_data.get("source", ""),
"proposal_risk_level": proposal_data.get("risk_level", ""),
"proposal_action_preview": str(
proposal_data.get("action")
or proposal_data.get("kubectl_command")
or ""
)[:240],
}
output_payload = {
"not_used_reason": not_used_reason,
"decision_effect": "audit_only",
"next_required_step": "wire approval_execution to Ansible check-mode before apply",
}
return {
"operation_type": "ansible_candidate_matched",
"status": "dry_run",
"input": input_payload,
"output": output_payload,
"dry_run_result": {
"check_mode_executed": False,
"candidate_count": len(candidates),
"reason": not_used_reason,
},
"tags": ["ansible", "decision", "candidate", "check_mode_pending"],
}
async def record_ansible_decision_audit(
*,
incident: Any,
proposal_data: dict[str, Any],
decision_path: str,
not_used_reason: str,
) -> bool:
"""Write a best-effort Ansible candidate audit row for one decision."""
payload = build_ansible_decision_audit_payload(
incident=incident,
proposal_data=proposal_data,
decision_path=decision_path,
not_used_reason=not_used_reason,
)
if payload is None:
return False
incident_id = payload["input"]["incident_id"]
project_id = getattr(incident, "project_id", None) or "awoooi"
try:
async with get_db_context(str(project_id)) as db:
existing = await db.execute(
text("""
SELECT op_id
FROM automation_operation_log
WHERE operation_type = 'ansible_candidate_matched'
AND input ->> 'incident_id' = :incident_id
AND input ->> 'executor' = 'ansible'
LIMIT 1
"""),
{"incident_id": incident_id},
)
if existing.scalar() is not None:
return False
await db.execute(
text("""
INSERT INTO automation_operation_log (
operation_type, actor, status,
input, output, dry_run_result, tags
) VALUES (
:operation_type,
'decision_manager',
:status,
CAST(:input AS jsonb),
CAST(:output AS jsonb),
CAST(:dry_run_result AS jsonb),
:tags
)
"""),
{
"operation_type": payload["operation_type"],
"status": payload["status"],
"input": json.dumps(payload["input"], ensure_ascii=False),
"output": json.dumps(payload["output"], ensure_ascii=False),
"dry_run_result": json.dumps(payload["dry_run_result"], ensure_ascii=False),
"tags": payload["tags"],
},
)
return True
except Exception as exc:
logger.warning(
"ansible_decision_audit_write_failed",
incident_id=incident_id,
error=str(exc),
)
return False

View File

@@ -46,6 +46,8 @@ from typing import Any
import structlog
from src.core.redis_client import get_redis
logger = structlog.get_logger(__name__)
# ─────────────────────────────────────────────────────────────────────────────
@@ -219,29 +221,23 @@ async def record_approval(
exp = payload["exp"]
try:
import aioredis
from src.core.config import settings
redis = aioredis.from_url(settings.REDIS_URL)
redis = get_redis()
# jti NX
jti_key = f"{_JTI_KEY_PREFIX}{jti}"
ttl_remaining = max(exp - int(time.time()), 1)
ok = await redis.set(jti_key, "1", nx=True, ex=ttl_remaining)
if not ok:
await redis.aclose()
raise TokenReplayError(f"jti={jti!r} 已使用")
# SADD approver
sig_key = f"{_SIG_SET_PREFIX}{project_id}:{run_id}:{tool_name}"
added = await redis.sadd(sig_key, approver_id)
if added == 0:
await redis.aclose()
raise DuplicateApproverError(f"approver '{approver_id}' 已簽核")
await redis.expire(sig_key, _SIG_TTL_SECONDS)
count = int(await redis.scard(sig_key))
await redis.aclose()
logger.info(
"awooop_approval_recorded",
@@ -271,13 +267,9 @@ async def check_approval_quorum(
檢查 quorum。Raises QuorumNotMetError if 不足。
"""
try:
import aioredis
from src.core.config import settings
redis = aioredis.from_url(settings.REDIS_URL)
redis = get_redis()
sig_key = f"{_SIG_SET_PREFIX}{project_id}:{run_id}:{tool_name}"
count = int(await redis.scard(sig_key))
await redis.aclose()
if count < required_count:
raise QuorumNotMetError(f"簽核數不足({count}/{required_count}")

File diff suppressed because it is too large Load Diff

View File

@@ -336,7 +336,7 @@ async def _get_tenant_budget_limit(project_id: str) -> Decimal | None:
try:
from sqlalchemy import text
from src.db.base import get_db_context
async with get_db_context() as db:
async with get_db_context(project_id) as db:
row = await db.execute(
text("SELECT budget_limit_usd FROM awooop_projects WHERE project_id = :pid"),
{"pid": project_id},

View File

@@ -280,6 +280,7 @@ async def dispatch_action(
# MCP registry dispatch
from src.plugins.mcp.registry import get_provider
from src.services.mcp_audit_context import with_mcp_audit_context
provider_name = _resolve_provider_name(spec.mcp_provider)
provider = get_provider(provider_name)
if not provider:
@@ -293,8 +294,16 @@ async def dispatch_action(
)
# 執行 MCP tool with timeout
audited_params = with_mcp_audit_context(
resolved_params,
session_id=f"callback:{incident_id}:{action_name}",
incident_id=incident_id,
flywheel_node="operate",
agent_role="telegram_callback_dispatcher",
operator_user_id=user_id,
)
mcp_result = await asyncio.wait_for(
provider.execute(spec.mcp_tool, resolved_params),
provider.execute(spec.mcp_tool, audited_params),
timeout=float(spec.timeout_sec),
)

File diff suppressed because it is too large Load Diff

View File

@@ -28,23 +28,201 @@ from __future__ import annotations
import asyncio
import hashlib
import html
import json
from datetime import datetime, timezone
import re
from datetime import UTC, datetime
from typing import Any
from uuid import UUID
from uuid import NAMESPACE_URL, UUID, uuid5
import structlog
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
from src.db.awooop_models import AwoooPRunState
from src.services.audit_sink import _redact_string
from src.services.audit_sink import _redact_string, sanitize
from src.services.platform_runtime import create_run
logger = structlog.get_logger(__name__)
# Progressive Feedback Policy等待超過此秒數才發 interim 訊息
_INTERIM_WAIT_SECONDS = 30
_INBOUND_REDACTION_VERSION = "audit_sink_v1"
_OUTBOUND_REDACTION_VERSION = "audit_sink_v1"
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{6}\b")
def _db_timestamp_now() -> datetime:
"""Return UTC now in the timestamp shape accepted by the production DB path."""
return datetime.now(UTC).replace(tzinfo=None)
def _compact_unique(values: list[str | None], *, limit: int = 20) -> list[str]:
"""Return stable non-empty values without leaking duplicate source refs."""
return sorted({str(value).strip() for value in values if str(value or "").strip()})[:limit]
def build_inbound_source_envelope(
*,
provider: str,
stage: str,
provider_event_id: str,
raw_event_id: str | None = None,
raw_content: str | None = None,
alertname: str | None = None,
severity: str | None = None,
namespace: str | None = None,
target_resource: str | None = None,
fingerprint: str | None = None,
incident_id: str | None = None,
approval_id: str | None = None,
source_url: str | None = None,
labels: dict[str, Any] | None = None,
annotations: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Build a redaction-friendly inbound replay envelope for truth-chain use."""
content_sha256 = hashlib.sha256(raw_content.encode()).hexdigest() if raw_content else None
text_refs = _INCIDENT_ID_RE.findall(raw_content or "")
provider_name = str(provider or "unknown").strip().lower() or "unknown"
source_refs = {
"event_ids": _compact_unique([raw_event_id]),
"incident_ids": _compact_unique([incident_id, *text_refs]),
"approval_ids": _compact_unique([approval_id]),
"alert_ids": _compact_unique([provider_event_id, raw_event_id]),
"fingerprints": _compact_unique([fingerprint]),
"sentry_issue_ids": _compact_unique(
[raw_event_id, provider_event_id] if provider_name == "sentry" else []
),
"signoz_alerts": _compact_unique(
[raw_event_id, alertname] if provider_name == "signoz" else []
),
}
envelope: dict[str, Any] = {
"schema_version": "inbound_source_envelope_v1",
"redaction_version": _INBOUND_REDACTION_VERSION,
"adapter": f"{provider_name}_webhook",
"provider": provider_name,
"stage": stage,
"provider_event_id": provider_event_id,
"source_url": source_url,
"content_sha256": content_sha256,
"content_length": len(raw_content) if raw_content is not None else 0,
"source_refs": source_refs,
"log_correlation": {
"alertname": alertname,
"severity": severity,
"namespace": namespace,
"target_resource": target_resource,
"fingerprint": fingerprint,
},
"labels": labels or {},
"annotations": annotations or {},
}
if extra:
envelope["extra"] = extra
sanitized = sanitize(envelope)
sanitized["content_sha256"] = content_sha256
return sanitized
def _input_sha256(input_payload: dict[str, Any] | None) -> str | None:
"""計算 Run input 的穩定 hash讓 mirror run 也能保留最小完整性證據。"""
if not input_payload:
return None
canonical = json.dumps(
input_payload,
sort_keys=True,
separators=(",", ":"),
ensure_ascii=False,
)
return hashlib.sha256(canonical.encode()).hexdigest()
async def ensure_completed_shadow_run(
db: AsyncSession,
*,
project_id: str,
run_id: UUID,
agent_id: str,
trigger_type: str,
trigger_ref: str | None,
input_payload: dict[str, Any] | None = None,
) -> bool:
"""為 legacy mirror 資料補一筆 completed shadow run。
AwoooP 在 strangler 階段會先 mirror legacy Telegram / alert-grouping
資料。這些事件不應重新觸發 runtime但需要 run_state 當 Console 的
聚合錨點;因此這裡建立的是已完成的 shadow run不會被 worker pick up。
"""
result = await db.execute(
text("""
INSERT INTO awooop_run_state (
run_id, project_id, agent_id, state,
trigger_type, trigger_ref, is_shadow,
input_sha256,
attempt_count, max_attempts, cost_usd, step_count,
created_at, completed_at, timeout_at
) VALUES (
:run_id, :project_id, :agent_id, 'completed',
:trigger_type, :trigger_ref, TRUE,
:input_sha256,
0, 3, 0.0000, 0,
NOW(), NOW(), NOW()
)
ON CONFLICT (run_id) DO NOTHING
RETURNING run_id
"""),
{
"run_id": run_id,
"project_id": project_id,
"agent_id": agent_id,
"trigger_type": trigger_type,
"trigger_ref": trigger_ref,
"input_sha256": _input_sha256(input_payload),
},
)
inserted = result.fetchone() is not None
if inserted:
logger.info(
"completed_shadow_run_created",
project_id=project_id,
run_id=str(run_id),
agent_id=agent_id,
trigger_type=trigger_type,
)
return inserted
def build_grouped_alert_run_id(project_id: str, provider_event_id: str) -> UUID:
"""為 grouped child alert 建立穩定 run_id讓 Run Monitor 可回查。"""
return uuid5(NAMESPACE_URL, f"awooop:grouped-alert:{project_id}:{provider_event_id}")
def build_alertmanager_provider_event_id(alert_id: str, fingerprint: str, stage: str) -> str:
"""建立 Alertmanager inbound event 的冪等 provider_event_id。"""
safe_alert_id = str(alert_id).strip() or "unknown"
safe_fingerprint = str(fingerprint).strip()[:32] or "no-fingerprint"
safe_stage = str(stage).strip()[:32] or "received"
return f"alertmanager:{safe_stage}:{safe_alert_id}:{safe_fingerprint}"
def build_alertmanager_run_id(project_id: str, provider_event_id: str) -> UUID:
"""為 Alertmanager inbound mirror 建立穩定 shadow run_id。"""
return uuid5(NAMESPACE_URL, f"awooop:alertmanager:{project_id}:{provider_event_id}")
def build_external_alert_provider_event_id(provider: str, event_id: str, stage: str) -> str:
"""建立 Sentry/SignOz 等外部告警 inbound event 的冪等 provider_event_id。"""
safe_provider = str(provider).strip().lower()[:32] or "external"
safe_event_id = str(event_id).strip()[:96] or "unknown"
safe_stage = str(stage).strip()[:32] or "received"
return f"{safe_provider}:{safe_stage}:{safe_event_id}"
def build_external_alert_run_id(project_id: str, provider_event_id: str) -> UUID:
"""為外部告警 inbound mirror 建立穩定 shadow run_id。"""
return uuid5(NAMESPACE_URL, f"awooop:external-alert:{project_id}:{provider_event_id}")
# ─────────────────────────────────────────────────────────────────────────────
@@ -62,6 +240,7 @@ async def mirror_inbound_event(
channel_chat_id: str | None = None,
content_type: str = "text",
raw_content: str | None = None,
source_envelope: dict[str, Any] | None = None,
attachment_sha256: str | None = None,
provider_ts: datetime | None = None,
run_id: UUID | None = None,
@@ -75,12 +254,32 @@ async def mirror_inbound_event(
"""
content_hash: str | None = None
content_preview: str | None = None
content_redacted: str | None = None
if raw_content is not None:
content_hash = hashlib.sha256(raw_content.encode()).hexdigest()
# previewredact 後截取前 256 字元
redacted = _redact_string(raw_content)
content_preview = redacted[:256] if len(redacted) > 256 else redacted
content_redacted = _redact_string(raw_content)
content_preview = (
content_redacted[:256] if len(content_redacted) > 256 else content_redacted
)
if source_envelope and source_envelope.get("schema_version") == "inbound_source_envelope_v1":
original_content_sha256 = source_envelope.get("content_sha256")
envelope = sanitize(source_envelope)
envelope.setdefault("redaction_version", _INBOUND_REDACTION_VERSION)
envelope["content_sha256"] = content_hash or original_content_sha256
envelope.setdefault("content_length", len(raw_content) if raw_content is not None else 0)
else:
envelope = build_inbound_source_envelope(
provider=channel_type,
stage="received",
provider_event_id=provider_event_id,
raw_event_id=provider_event_id,
raw_content=raw_content,
extra=source_envelope,
)
source_envelope_json = json.dumps(envelope, ensure_ascii=False, default=str)
result = await db.execute(
text("""
@@ -88,16 +287,28 @@ async def mirror_inbound_event(
project_id, channel_type, provider_event_id,
platform_subject_id, channel_user_id, channel_chat_id,
run_id, content_type, content_hash, content_preview,
content_redacted, redaction_version, source_envelope,
attachment_sha256, is_duplicate, provider_ts, received_at
) VALUES (
:project_id, :channel_type, :provider_event_id,
:platform_subject_id, :channel_user_id, :channel_chat_id,
:run_id, :content_type, :content_hash, :content_preview,
:content_redacted, :redaction_version, CAST(:source_envelope AS jsonb),
:attachment_sha256, :is_duplicate, :provider_ts, NOW()
)
ON CONFLICT (project_id, channel_type, provider_event_id) DO UPDATE SET
is_duplicate = TRUE,
run_id = COALESCE(EXCLUDED.run_id, awooop_conversation_event.run_id)
run_id = COALESCE(EXCLUDED.run_id, awooop_conversation_event.run_id),
content_redacted = COALESCE(
awooop_conversation_event.content_redacted,
EXCLUDED.content_redacted
),
redaction_version = EXCLUDED.redaction_version,
source_envelope = CASE
WHEN awooop_conversation_event.source_envelope = '{}'::jsonb
THEN EXCLUDED.source_envelope
ELSE awooop_conversation_event.source_envelope
END
RETURNING event_id
"""),
{
@@ -111,6 +322,9 @@ async def mirror_inbound_event(
"content_type": content_type,
"content_hash": content_hash,
"content_preview": content_preview,
"content_redacted": content_redacted,
"redaction_version": _INBOUND_REDACTION_VERSION,
"source_envelope": source_envelope_json,
"attachment_sha256": attachment_sha256,
"is_duplicate": is_duplicate,
"provider_ts": provider_ts,
@@ -128,6 +342,544 @@ async def mirror_inbound_event(
return event_id
def build_grouped_alert_provider_event_id(alert_id: str, fingerprint: str) -> str:
"""建立 grouped child alert 的冪等 provider_event_id。"""
safe_alert_id = str(alert_id).strip() or "unknown"
safe_fingerprint = str(fingerprint).strip()[:32] or "no-fingerprint"
return f"alert-group:{safe_alert_id}:{safe_fingerprint}"
def format_alertmanager_event_content(
*,
stage: str,
alert_id: str,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
fingerprint: str,
notification_type: str | None = None,
alert_category: str | None = None,
incident_id: str | None = None,
approval_id: str | None = None,
repeat_count: int | None = None,
) -> str:
"""格式化 Alertmanager inbound mirror 摘要,讓 truth-chain 可回查。"""
head = f"Incident: {incident_id}" if incident_id else f"Fingerprint: {fingerprint}"
return "\n".join(
[
f"Alertmanager inbound {stage}",
head,
f"Alert ID: {alert_id}",
f"Approval: {approval_id or '-'}",
f"Alert: {alertname}",
f"Severity: {severity}",
f"Namespace: {namespace or 'default'}",
f"Target: {target_resource or '-'}",
f"Fingerprint: {fingerprint}",
f"Notification Type: {notification_type or '-'}",
f"Alert Category: {alert_category or '-'}",
f"Repeat Count: {repeat_count if repeat_count is not None else '-'}",
]
)
def format_grouped_alert_event_content(
*,
alert_id: str,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
group_key: str,
count: int,
parent_fingerprint: str | None,
fingerprint: str,
) -> str:
"""格式化只落 AwoooP、不發 Telegram 的告警收斂事件摘要。"""
parent = parent_fingerprint or "-"
target = target_resource or "-"
ns = namespace or "default"
return "\n".join(
[
"告警已收斂,不發 Telegram",
f"Alert ID: {alert_id}",
f"Alert: {alertname}",
f"Severity: {severity}",
f"Namespace: {ns}",
f"Target: {target}",
f"Group: {group_key}",
f"Group Count: {count}",
f"Parent Fingerprint: {parent}",
f"Child Fingerprint: {fingerprint}",
]
)
def format_grouped_alert_digest_text(
*,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
group_key: str,
count: int,
) -> str:
"""格式化要回覆到父告警卡的短 digest。"""
safe_alert = html.escape(alertname or "unknown")
safe_severity = html.escape(severity or "unknown")
safe_namespace = html.escape(namespace or "default")
safe_target = html.escape(target_resource or "unknown")
safe_group = html.escape(group_key or "unknown")
return "\n".join(
[
"🧩 <b>告警已收斂到父卡</b>",
f"├ 類型:<code>{safe_alert}</code>",
f"├ 等級:<code>{safe_severity}</code>",
f"├ 範圍:<code>{safe_namespace}</code>",
f"├ 最新目標:<code>{safe_target}</code>",
f"├ 群組:<code>{safe_group}</code>",
f"└ 目前視窗:<b>{count}</b> 筆同組告警",
"",
"完整子告警請看 AwoooP Run 監控,不再逐筆發 Telegram。",
]
)
async def maybe_send_grouped_alert_digest(
*,
project_id: str,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
group_key: str,
count: int,
parent_fingerprint: str | None,
) -> bool:
"""若父告警卡已存在,回覆一則低頻 digest找不到父卡則安靜降級。"""
if not parent_fingerprint:
return False
try:
from sqlalchemy import select
from src.db.base import get_db_context
from src.db.models import ApprovalRecord
from src.services.telegram_gateway import get_telegram_gateway
async with get_db_context(project_id) as db:
result = await db.execute(
select(ApprovalRecord.incident_id)
.where(ApprovalRecord.fingerprint == parent_fingerprint)
.where(ApprovalRecord.incident_id.is_not(None))
.order_by(ApprovalRecord.created_at.desc())
.limit(1)
)
incident_id = result.scalar_one_or_none()
if not incident_id:
logger.info(
"grouped_alert_digest_parent_not_ready",
project_id=project_id,
group_key=group_key,
parent_fingerprint=parent_fingerprint,
)
return False
digest_text = format_grouped_alert_digest_text(
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
group_key=group_key,
count=count,
)
sent = await get_telegram_gateway().append_grouped_alert_digest(
incident_id=str(incident_id),
group_key=group_key,
digest_text=digest_text,
)
logger.info(
"grouped_alert_digest_result",
project_id=project_id,
incident_id=str(incident_id),
group_key=group_key,
count=count,
sent=sent,
)
return sent
except Exception as exc:
logger.warning(
"grouped_alert_digest_failed",
project_id=project_id,
group_key=group_key,
parent_fingerprint=parent_fingerprint,
error=str(exc),
)
return False
async def record_grouped_alert_event(
*,
project_id: str,
alert_id: str,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
group_key: str,
count: int,
parent_fingerprint: str | None,
fingerprint: str,
) -> UUID | None:
"""
將被 AlertGroupingService 收斂的子告警落到 AwoooP conversation_event。
這條路徑刻意不發 Telegram只保留 operator-facing 脈絡:
- 群組不洗版
- Console 仍能看到同組告警正在持續發生
- DB 失敗 fail-open不影響 Alertmanager webhook ACK
"""
try:
from src.db.base import get_db_context
provider_event_id = build_grouped_alert_provider_event_id(alert_id, fingerprint)
content = format_grouped_alert_event_content(
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
group_key=group_key,
count=count,
parent_fingerprint=parent_fingerprint,
fingerprint=fingerprint,
)
async with get_db_context(project_id) as db:
run_id = build_grouped_alert_run_id(project_id, provider_event_id)
await ensure_completed_shadow_run(
db,
project_id=project_id,
run_id=run_id,
agent_id="legacy-alert-grouping",
trigger_type="grouped_alert_event",
trigger_ref=provider_event_id,
input_payload={
"alert_id": alert_id,
"alertname": alertname,
"severity": severity,
"group_key": group_key,
"fingerprint": fingerprint,
},
)
event_id = await mirror_inbound_event(
db,
project_id=project_id,
channel_type="internal",
provider_event_id=provider_event_id,
platform_subject_id="alertmanager",
channel_user_id="alertmanager",
channel_chat_id=f"alert-group:{group_key}",
content_type="text",
raw_content=content,
provider_ts=_db_timestamp_now(),
run_id=run_id,
)
logger.info(
"grouped_alert_event_recorded",
project_id=project_id,
alert_id=alert_id,
event_id=str(event_id),
group_key=group_key,
count=count,
)
await maybe_send_grouped_alert_digest(
project_id=project_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
group_key=group_key,
count=count,
parent_fingerprint=parent_fingerprint,
)
return event_id
except Exception as exc:
logger.warning(
"grouped_alert_event_record_failed",
project_id=project_id,
alert_id=alert_id,
group_key=group_key,
error=str(exc),
)
return None
async def record_alertmanager_event(
*,
project_id: str,
alert_id: str,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
fingerprint: str,
stage: str,
notification_type: str | None = None,
alert_category: str | None = None,
incident_id: str | None = None,
approval_id: str | None = None,
repeat_count: int | None = None,
is_duplicate: bool = False,
source_url: str | None = None,
labels: dict[str, Any] | None = None,
annotations: dict[str, Any] | None = None,
source_extra: dict[str, Any] | None = None,
) -> UUID | None:
"""
將 Alertmanager inbound alert 鏡像到 AwoooP conversation_event。
Telegram 不應是唯一事實來源;每個 firing alert 至少要有 received
event建立 incident/approval 後再補 incident_linked event 供 truth-chain
依 incident_id 回查。DB 失敗 fail-open不影響 Alertmanager ACK。
"""
try:
from src.db.base import get_db_context
incident_ref = str(incident_id) if incident_id else None
approval_ref = str(approval_id) if approval_id else None
provider_event_id = build_alertmanager_provider_event_id(
alert_id=alert_id,
fingerprint=fingerprint,
stage=stage,
)
content = format_alertmanager_event_content(
stage=stage,
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
notification_type=notification_type,
alert_category=alert_category,
incident_id=incident_ref,
approval_id=approval_ref,
repeat_count=repeat_count,
)
source_envelope = build_inbound_source_envelope(
provider="alertmanager",
stage=stage,
provider_event_id=provider_event_id,
raw_event_id=alert_id,
raw_content=content,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
incident_id=incident_ref,
approval_id=approval_ref,
source_url=source_url,
labels=labels,
annotations=annotations,
extra={
"notification_type": notification_type,
"alert_category": alert_category,
"repeat_count": repeat_count,
**(source_extra or {}),
},
)
async with get_db_context(project_id) as db:
run_id = build_alertmanager_run_id(project_id, provider_event_id)
await ensure_completed_shadow_run(
db,
project_id=project_id,
run_id=run_id,
agent_id="legacy-alertmanager-webhook",
trigger_type="alertmanager_inbound",
trigger_ref=provider_event_id,
input_payload={
"stage": stage,
"alert_id": alert_id,
"alertname": alertname,
"severity": severity,
"namespace": namespace,
"target_resource": target_resource,
"fingerprint": fingerprint,
"notification_type": notification_type,
"alert_category": alert_category,
"incident_id": incident_ref,
"approval_id": approval_ref,
"repeat_count": repeat_count,
},
)
event_id = await mirror_inbound_event(
db,
project_id=project_id,
channel_type="internal",
provider_event_id=provider_event_id,
platform_subject_id="alertmanager",
channel_user_id="alertmanager",
channel_chat_id=f"alertmanager:{namespace or 'default'}",
content_type="text",
raw_content=content,
source_envelope=source_envelope,
provider_ts=_db_timestamp_now(),
run_id=run_id,
is_duplicate=is_duplicate,
)
logger.info(
"alertmanager_event_recorded",
project_id=project_id,
alert_id=alert_id,
event_id=str(event_id),
stage=stage,
incident_id=incident_ref,
fingerprint=fingerprint,
)
return event_id
except Exception as exc:
logger.warning(
"alertmanager_event_record_failed",
project_id=project_id,
alert_id=alert_id,
stage=stage,
fingerprint=fingerprint,
error=str(exc),
)
return None
async def record_external_alert_event(
*,
project_id: str,
provider: str,
event_id: str,
stage: str,
title: str,
severity: str,
namespace: str | None = None,
target_resource: str | None = None,
fingerprint: str | None = None,
incident_id: str | None = None,
approval_id: str | None = None,
source_url: str | None = None,
labels: dict[str, Any] | None = None,
annotations: dict[str, Any] | None = None,
payload: dict[str, Any] | None = None,
is_duplicate: bool = False,
) -> UUID | None:
"""
將 Sentry / SignOz 等非 Alertmanager 告警鏡像到 conversation_event。
這是 truth-chain 的最低共用入口:只寫 redacted content + source_envelope
不改變原本 webhook 的通知、審批或自動化行為。
"""
provider_name = str(provider or "external").strip().lower() or "external"
provider_event_id = build_external_alert_provider_event_id(provider_name, event_id, stage)
content = "\n".join([
f"{provider_name} inbound {stage}",
f"Event ID: {event_id}",
f"Title: {title}",
f"Severity: {severity}",
f"Namespace: {namespace or '-'}",
f"Target: {target_resource or '-'}",
f"Fingerprint: {fingerprint or '-'}",
f"Incident: {incident_id or '-'}",
f"Approval: {approval_id or '-'}",
f"Source URL: {source_url or '-'}",
])
source_envelope = build_inbound_source_envelope(
provider=provider_name,
stage=stage,
provider_event_id=provider_event_id,
raw_event_id=event_id,
raw_content=content,
alertname=title,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
incident_id=str(incident_id) if incident_id else None,
approval_id=str(approval_id) if approval_id else None,
source_url=source_url,
labels=labels,
annotations=annotations,
extra={
"payload": payload or {},
},
)
try:
from src.db.base import get_db_context
async with get_db_context(project_id) as db:
run_id = build_external_alert_run_id(project_id, provider_event_id)
await ensure_completed_shadow_run(
db,
project_id=project_id,
run_id=run_id,
agent_id=f"legacy-{provider_name}-webhook",
trigger_type=f"{provider_name}_inbound",
trigger_ref=provider_event_id,
input_payload={
"provider": provider_name,
"event_id": event_id,
"stage": stage,
"severity": severity,
"namespace": namespace,
"target_resource": target_resource,
"fingerprint": fingerprint,
"incident_id": str(incident_id) if incident_id else None,
"approval_id": str(approval_id) if approval_id else None,
},
)
event_uuid = await mirror_inbound_event(
db,
project_id=project_id,
channel_type="internal",
provider_event_id=provider_event_id,
platform_subject_id=provider_name,
channel_user_id=provider_name,
channel_chat_id=f"{provider_name}:{namespace or 'default'}",
content_type="text",
raw_content=content,
source_envelope=source_envelope,
provider_ts=_db_timestamp_now(),
run_id=run_id,
is_duplicate=is_duplicate,
)
logger.info(
"external_alert_event_recorded",
project_id=project_id,
provider=provider_name,
event_id=event_id,
stage=stage,
conversation_event_id=str(event_uuid),
incident_id=str(incident_id) if incident_id else None,
approval_id=str(approval_id) if approval_id else None,
)
return event_uuid
except Exception as exc:
logger.warning(
"external_alert_event_record_failed",
project_id=project_id,
provider=provider_name,
event_id=event_id,
stage=stage,
error=str(exc),
)
return None
# ─────────────────────────────────────────────────────────────────────────────
# 出站訊息記錄
# ─────────────────────────────────────────────────────────────────────────────
@@ -141,6 +893,7 @@ async def record_outbound_message(
channel_chat_id: str,
message_type: str, # 'interim' | 'final' | 'error' | 'approval_request'
content: str | None = None,
source_envelope: dict[str, Any] | None = None,
provider_message_id: str | None = None,
send_status: str = "pending",
conversation_event_id: UUID | None = None,
@@ -155,26 +908,61 @@ async def record_outbound_message(
"""
content_hash: str | None = None
content_preview: str | None = None
content_redacted: str | None = None
if content is not None:
content_hash = hashlib.sha256(content.encode()).hexdigest()
redacted = _redact_string(content)
content_preview = redacted[:256]
content_redacted = _redact_string(content)
content_preview = content_redacted[:256]
envelope: dict[str, Any] = sanitize(source_envelope or {})
envelope.update({
"schema_version": "outbound_source_envelope_v1",
"redaction_version": _OUTBOUND_REDACTION_VERSION,
"content_sha256": content_hash,
"content_length": len(content) if content is not None else 0,
})
source_envelope_json = json.dumps(envelope, ensure_ascii=False, default=str)
actual_status = "shadow" if is_shadow else send_status
sent_at = (
_db_timestamp_now()
if actual_status == "sent"
else None
)
await ensure_completed_shadow_run(
db,
project_id=project_id,
run_id=run_id,
agent_id="legacy-telegram-gateway",
trigger_type="legacy_outbound",
trigger_ref=provider_message_id,
input_payload={
"channel_type": channel_type,
"channel_chat_id": channel_chat_id,
"message_type": message_type,
"send_status": actual_status,
"triggered_by_state": triggered_by_state,
},
)
result = await db.execute(
text("""
INSERT INTO awooop_outbound_message (
project_id, run_id, conversation_event_id,
channel_type, channel_chat_id, message_type,
content_hash, content_preview, provider_message_id,
send_status, queued_at,
content_hash, content_preview, content_redacted,
redaction_version, source_envelope,
provider_message_id,
send_status, queued_at, sent_at,
triggered_by_state, waiting_since
) VALUES (
:project_id, :run_id, :conversation_event_id,
:channel_type, :channel_chat_id, :message_type,
:content_hash, :content_preview, :provider_message_id,
:send_status, NOW(),
:content_hash, :content_preview, :content_redacted,
:redaction_version, CAST(:source_envelope AS jsonb),
:provider_message_id,
:send_status, NOW(), :sent_at,
:triggered_by_state, :waiting_since
)
RETURNING message_id
@@ -188,8 +976,12 @@ async def record_outbound_message(
"message_type": message_type,
"content_hash": content_hash,
"content_preview": content_preview,
"content_redacted": content_redacted,
"redaction_version": _OUTBOUND_REDACTION_VERSION,
"source_envelope": source_envelope_json,
"provider_message_id": provider_message_id,
"send_status": actual_status,
"sent_at": sent_at,
"triggered_by_state": triggered_by_state,
"waiting_since": waiting_since,
},
@@ -278,7 +1070,7 @@ async def _interim_feedback_task(
# run 已推進complete/failed 等),不需要 interim
return
waiting_since = datetime.now(timezone.utc)
waiting_since = datetime.now(UTC)
interim_content = "AI 正在分析中,請稍候... ⏳"
await record_outbound_message(

View File

@@ -26,7 +26,7 @@ from __future__ import annotations
import asyncio
import re
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Literal
import httpx
@@ -125,7 +125,7 @@ class DecisionFusionAdapter:
# Public API
# =========================================================================
async def fuse_decision(self, event: "AiGovernanceEvent") -> FusedDecision:
async def fuse_decision(self, event: AiGovernanceEvent) -> FusedDecision:
"""三維融合LLM × Playbook × MCP → FusedDecision。
三個維度並行評估asyncio.gather任一失敗靜默降為 0.5。
@@ -226,7 +226,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_llm(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, str, dict[str, Any]]:
"""Ollama LLM 推理:治理事件情境 → 建議動作 + 信心度。
@@ -254,7 +254,9 @@ class DecisionFusionAdapter:
"只輸出 CONFIDENCE 和 ACTION 兩行,不要其他解釋。"
)
ollama_url = getattr(self._settings, "OLLAMA_URL", "http://192.168.0.111:11434") # 2026-05-04 ogt: ADR-110 修正 — 111 primary
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
ollama_url = resolve_ollama_endpoint("deep_rca")
try:
async with httpx.AsyncClient(
@@ -320,7 +322,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_playbook(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, str | None, float | None]:
"""Playbook 相似度比對 → 取最高 trust_score。
@@ -373,7 +375,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_mcp(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, dict[str, Any]]:
"""Prometheus 情報採集 → MCP 感官品質分數。

View File

@@ -21,6 +21,7 @@ Decision Manager - Phase 6.5 非同步決策狀態機
"""
import asyncio
import html
import json
from datetime import UTC, datetime
from enum import Enum
@@ -55,6 +56,20 @@ def _fire_and_forget(coro) -> asyncio.Task:
return task
def _incident_alertname_for_dedup(incident: Incident) -> str:
"""Return a stable alert name for Telegram fingerprint dedup."""
if incident.signals:
signal = incident.signals[0]
return (
signal.labels.get("alertname")
or signal.alert_name
or signal.annotations.get("summary")
or signal.annotations.get("description")
or incident.incident_id
)
return incident.incident_id
def _phase2_fallback_reason(package: Any) -> str | None:
"""Return why a Phase 2 package should continue to Playbook/LLM fallback.
@@ -75,6 +90,22 @@ def _phase2_fallback_reason(package: Any) -> str | None:
return None
def _incident_llm_timeout_seconds() -> float:
"""Return the outer timeout for incident LLM proposals.
The provider layer already has per-provider timeouts. This outer guard must
not be shorter than the GCP Ollama lane, or alert diagnosis will be cut off
before the free/local-first route can answer.
"""
configured = getattr(settings, "INCIDENT_LLM_TIMEOUT_SECONDS", None)
try:
timeout = float(configured)
except (TypeError, ValueError):
timeout = 240.0
return max(timeout, float(getattr(settings, "OPENCLAW_TIMEOUT", 30)))
def _should_escalate_auto_approve_rejection(reason: Any) -> bool:
"""Return True for manual gates that mean the automation path went blind."""
@@ -212,7 +243,7 @@ async def _push_decision_to_telegram(
# 改成 alertname+target 構造的 fingerprint key + TTL 86400s同症狀共用 dedup。
# Incident 真正 RESOLVED/CLOSED 時走 line 220-226 的 status check 提早 return不影響復發偵測。
redis = get_redis()
_alertname_fp = (incident.title or "unknown").strip().lower().replace(" ", "_")[:60]
_alertname_fp = _incident_alertname_for_dedup(incident).strip().lower().replace(" ", "_")[:60]
_target_fp = (
incident.affected_services[0] if incident.affected_services else "unknown"
).lower()[:40]
@@ -546,6 +577,7 @@ async def _push_decision_to_telegram(
alert_category=_alert_category,
notification_type=_notification_type,
playbook_name=_playbook_name,
automation_state=proposal_data.get("automation_state", ""),
)
# 2026-04-09 Claude Sonnet 4.6: 存 message_id → 後續狀態更新在原訊息延續
@@ -607,7 +639,7 @@ async def _nemoclaw_second_opinion(incident: "Incident", primary_result: dict) -
"""
MCP Phase 4a: NemoClaw second opinion — 信心 < 0.7 時觸發
============================================================
用 deepseek-r1:14b (Ollama 188) 對同一份資料做獨立推理,
用 deepseek-r1:14b (設定的 Ollama primary) 對同一份資料做獨立推理,
輸出純文字 advisory_note不執行任何操作。
2026-04-11 Claude Sonnet 4.6 Asia/Taipei
@@ -666,7 +698,7 @@ async def _generate_playbook_draft_if_new(incident: "Incident") -> None:
MCP Phase 4c: Playbook 無命中時,自動生成 AI 草稿 Playbook 寫入 KM
=====================================================================
- 僅在 KM 中不存在同 alertname 的 Playbook 時觸發(避免重複)
- 用 qwen2.5:7b-instruct (Ollama 188) 生成結構化 Playbook 草稿
- 用 qwen2.5:7b-instruct (設定的 Ollama primary) 生成結構化 Playbook 草稿
- 寫入 KnowledgeEntrystatus=DRAFT需人工審核後升為 APPROVED
- 寫入 AlertOperationLog PLAYBOOK_DRAFT_CREATED 事件
@@ -827,7 +859,6 @@ async def _resolve_target_from_k8s(incident: "Incident", namespace: str) -> str
reason="alertname 有對應但 keywords=[],走 fallback 取第一個非 infra pod",
)
import re as _re
for line in pod_lines:
pod = line.removeprefix("pod/").strip()
if not pod:
@@ -977,6 +1008,58 @@ def _format_metrics_delta(before: dict, after: dict) -> str:
return " | ".join(parts)
def _clip_telegram_field(value: str | None, limit: int) -> str:
"""Normalize a short Telegram field without leaking multiline command noise."""
text = " ".join(str(value or "").split())
if len(text) <= limit:
return text
return f"{text[: max(0, limit - 3)]}..."
def _format_auto_repair_status_line(
*,
incident_id: str,
target: str,
action: str,
success: bool,
error: str = "",
metrics_delta_text: str = "",
) -> str:
"""Render auto-repair result as a scannable operation card."""
safe_incident = html.escape(_clip_telegram_field(incident_id, 40))
safe_target = html.escape(_clip_telegram_field(target, 80) or "unknown")
safe_action = html.escape(_clip_telegram_field(action, 160) or "已執行")
safe_error = html.escape(_clip_telegram_field(error, 180) or "未回傳錯誤")
if success:
delta_line = (
f"\n├ 指標:<code>{html.escape(_clip_telegram_field(metrics_delta_text, 120))}</code>"
if metrics_delta_text
else ""
)
return (
"✅ <b>AUTO RESOLVEDAI 自動修復完成</b>\n"
"──────────────────────\n"
f"├ 事件:<code>{safe_incident}</code>\n"
f"├ 對象:<code>{safe_target}</code>\n"
f"├ 執行:<code>{safe_action}</code>\n"
"├ 狀態:自動化已完成,等待後驗證觀察\n"
"├ ActorleWOOOgo autonomous"
f"{delta_line}"
)
return (
"🧑‍🔧 <b>HANDOFF REQUIREDAI 自動修復失敗,已轉人工</b>\n"
"──────────────────────\n"
f"├ 事件:<code>{safe_incident}</code>\n"
f"├ 對象:<code>{safe_target}</code>\n"
f"├ 嘗試:<code>{safe_action}</code>\n"
f"├ 原因:<code>{safe_error}</code>\n"
"├ 狀態:自動化已停止,不再重試\n"
"└ 下一步:請 SRE 依 AwoooP Run / 原告警卡處理"
)
async def _push_auto_repair_result(
incident: Incident,
action: str,
@@ -1052,24 +1135,16 @@ async def _push_auto_repair_result(
except Exception as _k8s_err:
logger.debug("k8s_state_after_failed", incident_id=inc_id, error=str(_k8s_err))
# 2026-05-02 ogt + Claude Sonnet 4.6: 強制標記 [AUTO],避免事後抵賴
# 統帥要求「就算是自動化處理,也要發告警訊息出來」—— 所有自治動作必須留痕,
# 且 Telegram 上能明顯與人工點擊區隔。
if success:
delta_line = f"\n├ 指標: <code>{metrics_delta_text}</code>" if metrics_delta_text else ""
status_line = (
f"🤖 <b>[AUTO] AI 自動修復完成</b>\n"
f"├ 動作: <code>{action[:100] if action else '已執行'}</code>\n"
f"├ Actor: leWOOOgo (autonomous)"
f"{delta_line}"
)
else:
status_line = (
f"🤖❌ <b>[AUTO] AI 自動修復失敗,已升級人工介入</b>\n"
f"├ 動作: <code>{action[:80] if action else '未知'}</code>\n"
f"├ Actor: leWOOOgo (autonomous)\n"
f"└ 錯誤: {error[:100] if error else '未知錯誤'}"
)
# 2026-05-07 Codex: 自動化結果必須讓 SRE 一眼分辨「已自動解決」或
# 「已停止並轉人工」,不能再用 raw command / exception 片段洗版。
status_line = _format_auto_repair_status_line(
incident_id=inc_id,
target=target,
action=action,
success=success,
error=error,
metrics_delta_text=metrics_delta_text,
)
# BUG-006 修復 2026-04-11: outcome + verification_result 全為 null
# 原因_push_auto_repair_result 只送 Telegram沒寫 DB
@@ -1715,6 +1790,25 @@ class DecisionManager:
token.proposal_data["auto_approve_reason"] = auto_decision.reason_detail
await self._save_token(token)
try:
from src.services.awooop_ansible_audit_service import (
record_ansible_decision_audit as _record_ansible_decision_audit,
)
_fire_and_forget(
_record_ansible_decision_audit(
incident=incident,
proposal_data=token.proposal_data,
decision_path="auto_execute",
not_used_reason=(
"auto_execute selected existing executor path; "
"Ansible check-mode is not wired yet"
),
)
)
except Exception as _ansible_audit_err:
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
# 觸發自動執行 (非阻塞)
_fire_and_forget(
self._auto_execute(incident, token)
@@ -1738,6 +1832,24 @@ class DecisionManager:
),
)
)
try:
from src.services.awooop_ansible_audit_service import (
record_ansible_decision_audit as _record_ansible_decision_audit,
)
_fire_and_forget(
_record_ansible_decision_audit(
incident=incident,
proposal_data=token.proposal_data,
decision_path="manual_approval",
not_used_reason=(
"manual approval required; Ansible check-mode "
"is not wired to approval execution yet"
),
)
)
except Exception as _ansible_audit_err:
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
_fire_and_forget(
_push_decision_to_telegram(incident, token.proposal_data)
)
@@ -1858,14 +1970,14 @@ class DecisionManager:
try:
from src.core.feature_flags import aiops_flags as _p6_flags
if _p6_flags.is_sub_flag_enabled("AIOPS_P6_SELF_DEMOTION"):
from src.db.base import get_session_factory as _p6_sf
from src.db.base import get_db_context as _p6_db_context
from src.db.models import AiGovernanceEvent as _GovernanceEvent
from sqlalchemy import select as _p6_select, func as _p6_func
from datetime import timedelta as _p6_td
_now = __import__("src.utils.timezone", fromlist=["now_taipei"]).now_taipei()
async with _p6_sf()() as _p6_sess:
async with _p6_db_context() as _p6_sess:
# 過去 7 天有幾筆未解決的 slo_violation
_viol_7d_q = await _p6_sess.execute(
_p6_select(_p6_func.count()).where(
@@ -1905,8 +2017,8 @@ class DecisionManager:
)
# 記錄保守模式事件
try:
from src.db.base import get_session_factory as _p6_sf2
async with _p6_sf2()() as _s2:
from src.db.base import get_db_context as _p6_db_context2
async with _p6_db_context2() as _s2:
_s2.add(_GovernanceEvent(
event_type="conservative_mode",
details={
@@ -1946,8 +2058,8 @@ class DecisionManager:
_push_decision_to_telegram(incident, token.proposal_data)
)
try:
from src.db.base import get_session_factory as _p6_sf3
async with _p6_sf3()() as _s3:
from src.db.base import get_db_context as _p6_db_context3
async with _p6_db_context3() as _s3:
_s3.add(_GovernanceEvent(
event_type="self_demotion",
details={
@@ -2694,9 +2806,10 @@ class DecisionManager:
if context_parts:
llm_expert_context["diagnosis_context"] = "\n\n".join(context_parts)
# GAP-B4 (2026-04-14 Claude Sonnet 4.6): LLM 25s hard timeout
# 比外層 decide() 30s wait_for 更嚴格,留 5s 給 YAML risk override + NemoClaw second opinion
# Timeout → 明確 llm_timeout_fallback 日誌,返回 expert_result 而非等外層觸發
# 2026-05-06 Codex: The alert goal is resolution quality, not a
# fast-but-paid card. The outer guard is configurable and must allow
# the GCP-A → GCP-B → 111 Ollama lane to finish before cloud backup.
llm_timeout_seconds = _incident_llm_timeout_seconds()
llm_result, provider, success = await asyncio.wait_for(
self._openclaw.generate_incident_proposal_with_tools(
incident_id=incident.incident_id,
@@ -2705,7 +2818,7 @@ class DecisionManager:
affected_services=incident.affected_services,
expert_context=llm_expert_context if llm_expert_context else None,
),
timeout=25.0,
timeout=llm_timeout_seconds,
)
if success and llm_result:
@@ -2772,7 +2885,7 @@ class DecisionManager:
logger.warning(
"llm_timeout_fallback",
incident_id=incident.incident_id,
timeout_sec=25.0,
timeout_sec=llm_timeout_seconds,
action="降級 Expert System",
)
except Exception as e:
@@ -2923,6 +3036,52 @@ class DecisionManager:
return None
async def _find_existing_tokens_for_incidents(
self,
incident_ids: list[str],
) -> dict[str, DecisionToken]:
"""
批次查找現有決策令牌。
2026-05-06 Codex: GET /api/v1/incidents 是前端輪詢路徑,不可對每個
incident 都掃描一次 decision:*。這裡只掃一次 Redis keyspace避免
200+ incidents 時形成 O(N×M) 延遲與前端控制台卡死。
"""
wanted = set(incident_ids)
if not wanted:
return {}
import json
redis_client = get_redis()
found: dict[str, DecisionToken] = {}
cursor = 0
while True:
cursor, keys = await redis_client.scan(
cursor=cursor,
match=f"{DECISION_TOKEN_PREFIX}*",
count=500,
)
for key in keys:
try:
data = await redis_client.get(key)
if not data:
continue
token_data = json.loads(data)
incident_id = token_data.get("incident_id")
if incident_id in wanted and incident_id not in found:
found[incident_id] = DecisionToken.from_dict(token_data)
if len(found) == len(wanted):
return found
except Exception:
continue
if cursor == 0:
break
return found
async def _persist_decision_to_db(
self, incident_id: str, proposal_data: dict
) -> None:
@@ -3236,7 +3395,7 @@ class DecisionManager:
# 與 line 217-218 同邏輯,避免 pod restart resend 路徑繞過 fingerprint dedup。
# 原本 telegram_sent:{incident_id} TTL 600s 早就過期 → 重啟必重發;
# 改 fingerprint + 24h TTL → 同症狀 24h 內任何 INC ID 都不會重推。
_alertname_fp = (getattr(incident, "title", None) or "unknown").strip().lower().replace(" ", "_")[:60]
_alertname_fp = _incident_alertname_for_dedup(incident).strip().lower().replace(" ", "_")[:60]
_affected = getattr(incident, "affected_services", None) or []
_target_fp = (_affected[0] if _affected else "unknown").lower()[:40]
dedup_key = f"telegram_sent:fp:{_alertname_fp}:{_target_fp}"
@@ -3443,6 +3602,8 @@ class DecisionManager:
token.proposal_data["decision_state"] = DecisionState.READY.value
token.proposal_data["auto_executed"] = False
token.proposal_data["mcp_all_failed"] = True
if _tool == "ssh_diagnose":
token.proposal_data["automation_state"] = "diagnosis_failed_manual_required"
await self._save_token(token)
_fire_and_forget(
_escalate_decision_auto_repair_unavailable(
@@ -3452,14 +3613,15 @@ class DecisionManager:
attempted_actions=f"decision_manager._ssh_execute -> {_tool}",
)
)
_fire_and_forget(
_push_auto_repair_result(
incident,
action,
success=False,
error=token.error,
if _tool != "ssh_diagnose":
_fire_and_forget(
_push_auto_repair_result(
incident,
action,
success=False,
error=token.error,
)
)
)
_fire_and_forget(_push_decision_to_telegram(incident, token.proposal_data))
return
@@ -3469,6 +3631,7 @@ class DecisionManager:
token.proposal_data["auto_executed"] = False
token.proposal_data["ssh_diagnosis_collected"] = True
token.proposal_data["ssh_diagnosis_preview"] = output_preview
token.proposal_data["automation_state"] = "diagnosis_collected_manual_required"
await self._save_token(token)
_fire_and_forget(
_escalate_decision_auto_repair_unavailable(

View File

@@ -17,11 +17,12 @@ Drift Interpreter - Phase 25 P2 Config Drift Detection
from __future__ import annotations
import json
import re
from typing import TYPE_CHECKING
import structlog
from src.models.drift import DriftIntent, DriftInterpretation, DriftItem
from src.models.drift import DriftIntent, DriftInterpretation
if TYPE_CHECKING:
from src.models.drift import DriftReport
@@ -52,6 +53,58 @@ _INTENT_PROMPT_TEMPLATE = """你是 AWOOOI GitOps 守門員,請分析以下 K8
"""
def _strip_think_blocks(text: str) -> str:
"""移除 qwen/deepseek 類模型常見的 <think> 推理段。"""
return re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE).strip()
def _extract_first_json_object(text: str) -> dict | None:
"""
從 LLM 回應中擷取第一個 JSON object。
Ollama qwen3/deepseek 常會在 JSON 前後加 `<think>` 或短句;這些文字不應
讓 drift intent 直接降級成 UNKNOWN。
"""
cleaned = _strip_think_blocks(text)
candidates = [cleaned]
candidates.extend(match.group(1).strip() for match in re.finditer(r"```(?:json)?\s*([\s\S]+?)```", cleaned))
start = cleaned.find("{")
if start >= 0:
in_string = False
escaped = False
depth = 0
for idx, ch in enumerate(cleaned[start:], start=start):
if escaped:
escaped = False
continue
if ch == "\\":
escaped = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
candidates.append(cleaned[start : idx + 1])
break
for candidate in candidates:
try:
data = json.loads(candidate)
except Exception:
continue
if isinstance(data, dict):
return data
return None
class NemotronDriftInterpreter:
"""
使用 Nemotron 分析漂移意圖
@@ -62,7 +115,7 @@ class NemotronDriftInterpreter:
❌ 不直接呼叫 kubectl 或 git
"""
async def analyze(self, report: "DriftReport") -> DriftInterpretation:
async def analyze(self, report: DriftReport) -> DriftInterpretation:
"""
分析漂移意圖
@@ -85,7 +138,7 @@ class NemotronDriftInterpreter:
result = await self._call_nemotron(prompt)
return result
def _format_diff_for_prompt(self, report: "DriftReport") -> str:
def _format_diff_for_prompt(self, report: DriftReport) -> str:
"""格式化 diff 給 Nemotron 分析用"""
lines = []
for item in report.items[:10]: # 最多 10 項避免 token 過多
@@ -111,7 +164,17 @@ class NemotronDriftInterpreter:
try:
from src.services.openclaw import get_openclaw
openclaw = get_openclaw()
response_text, _provider, success = await openclaw.call(prompt)
response_text, _provider, success = await openclaw.call(
prompt,
alert_context={
"intent_hint": "config",
"task_type": "diagnose",
"enforce_ollama_first": True,
"allow_gcp_heavy_model": True,
"target_resource": "config-drift",
"alert_type": "ConfigDriftInternalScan",
},
)
if not success or not response_text:
logger.warning("drift_interpreter_openclaw_failed", provider=_provider)
@@ -125,19 +188,9 @@ class NemotronDriftInterpreter:
def _parse_response(self, text: str) -> DriftInterpretation:
"""解析 Nemotron JSON 回應"""
try:
# 嘗試直接解析
data = json.loads(text)
except Exception:
try:
import re
match = re.search(r"```(?:json)?\s*([\s\S]+?)```", text)
if match:
data = json.loads(match.group(1))
else:
return self._unknown_result("無法解析 JSON")
except Exception:
return self._unknown_result("JSON 解析失敗")
data = _extract_first_json_object(text)
if data is None:
return self._unknown_result("無法解析 JSON")
try:
intent_str = data.get("intent", "unknown")

View File

@@ -33,10 +33,11 @@ logger = structlog.get_logger(__name__)
# ============================================================
# 設定
# ============================================================
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
# 2026-05-05 Codex: 重摘要走 111 lane避免污染 GCP alert-fast lane
def _get_ollama_url() -> str:
from src.core.config import get_settings
return get_settings().OLLAMA_URL
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.drift_summary 讀取
NARRATOR_MODEL = get_model("ollama", "drift_summary")
NARRATOR_TIMEOUT = 90.0 # seconds
@@ -120,8 +121,8 @@ class DriftNarratorService:
async def narrate_and_notify(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None" = None,
report: DriftReport,
interpretation: DriftInterpretation | None = None,
) -> None:
"""
生成人話摘要並推送 Telegram
@@ -147,7 +148,13 @@ class DriftNarratorService:
# 2026-04-18 B 方案: LLM 同時產 narrative + 結構化 items取代 str()[:30]
# 2026-04-20 P0.2: 追加 recommendationaction/confidence/reason
narrative, items, recommendation = await self._generate_narrative_and_items(report, interpretation)
await self._send_telegram(report, narrative, items, recommendation)
repeat_state = None
try:
from src.repositories.drift_repository import get_drift_repository
repeat_state = await get_drift_repository().get_repeat_state(report)
except Exception as e:
logger.warning("drift_repeat_state_lookup_failed", report_id=report.report_id, error=str(e))
await self._send_telegram(report, narrative, items, recommendation, repeat_state)
# 寫入 DB narrative_text (Phase 30 ADR-067)
try:
@@ -166,7 +173,7 @@ class DriftNarratorService:
medium=report.medium_count,
)
def _should_narrate(self, report: "DriftReport") -> bool:
def _should_narrate(self, report: DriftReport) -> bool:
"""觸發條件high >= 1 or medium >= 3"""
# 過濾 HPA 白名單後重算
non_hpa_items = [
@@ -180,8 +187,8 @@ class DriftNarratorService:
async def _generate_narrative_and_items(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> tuple[str, list[dict], dict]:
"""
2026-04-18 ogt + Claude Opus 4.7: B 方案 — LLM 產生 narrative + 結構化 items
@@ -354,8 +361,8 @@ class DriftNarratorService:
def _fallback_recommendation(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> dict:
"""
2026-04-20 P0.2 ogt + Claude Opus 4.7: LLM 沒給 recommendation 時的 Python fallback
@@ -397,7 +404,7 @@ class DriftNarratorService:
async def _log_ai_action_to_db(
self,
report: "DriftReport",
report: DriftReport,
prompt: str,
raw_response: str | None,
narrative: str,
@@ -416,7 +423,9 @@ class DriftNarratorService:
- 若能找到該 drift 的 incident 關聯,設 parent_op_id
"""
import json as _json
from sqlalchemy import text as _sql
from src.db.base import get_db_context
input_json = _json.dumps({
@@ -511,7 +520,7 @@ class DriftNarratorService:
items_count=len(items),
)
def _format_drift_for_llm(self, report: "DriftReport") -> str:
def _format_drift_for_llm(self, report: DriftReport) -> str:
"""
2026-04-18 ogt + Claude Opus 4.7: B 方案 — 餵 LLM 用的 JSON 序列化
保留更多原始 context 給 LLM 推理,不做 30 字元暴力截斷
@@ -582,7 +591,7 @@ class DriftNarratorService:
# 一般變化
return f"{from_val}{to_val}"
def _fallback_items(self, report: "DriftReport") -> list[dict]:
def _fallback_items(self, report: DriftReport) -> list[dict]:
"""
LLM 失敗時的 Python 智能摘要 (取代舊 str()[:30])
- 過濾白名單
@@ -605,7 +614,7 @@ class DriftNarratorService:
})
return items
def _format_intent_summary(self, interpretation: "DriftInterpretation | None") -> str:
def _format_intent_summary(self, interpretation: DriftInterpretation | None) -> str:
if not interpretation:
return "無意圖分析"
return (
@@ -616,8 +625,8 @@ class DriftNarratorService:
def _fallback_narrative(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> str:
"""LLM 失敗時的結構化 fallback"""
resources = list({
@@ -636,10 +645,11 @@ class DriftNarratorService:
async def _send_telegram(
self,
report: "DriftReport",
report: DriftReport,
narrative: str,
items: list[dict],
recommendation: dict | None = None,
repeat_state: dict | None = None,
) -> None:
"""
推送 TYPE-4D Config Drift 卡片ADR-075+ B 方案智能摘要
@@ -651,7 +661,7 @@ class DriftNarratorService:
"""
from src.services.telegram_gateway import get_telegram_gateway
diff_summary = self._render_telegram_body(report, narrative, items, recommendation)
diff_summary = self._render_telegram_body(report, narrative, items, recommendation, repeat_state)
try:
tg = get_telegram_gateway()
@@ -667,7 +677,7 @@ class DriftNarratorService:
except Exception as e:
logger.warning("drift_narrator_telegram_error", error=str(e))
def _count_nontrivial_drift(self, report: "DriftReport") -> int:
def _count_nontrivial_drift(self, report: DriftReport) -> int:
"""
計算非白名單、非 trivial (K8s 自動補齊) 的 drift 數
用於 Telegram 底部「還有 N 項」顯示實際可操作數量
@@ -704,10 +714,11 @@ class DriftNarratorService:
def _render_telegram_body(
self,
report: "DriftReport",
report: DriftReport,
narrative: str,
items: list[dict],
recommendation: dict | None = None,
repeat_state: dict | None = None,
) -> str:
"""
組裝 Telegram 卡片 bodyB 方案格式 + P0.2 AI 推薦)
@@ -738,6 +749,10 @@ class DriftNarratorService:
}.get(_act, _act)
lines.append(f"🎯 AI 建議:{_emoji_action} ({int(_conf * 100)}%) — {_reason}\n")
repeat_line = self._render_repeat_state(repeat_state)
if repeat_line:
lines.append(f"{repeat_line}\n")
lines.append(f"🤖 AI 研判\n{narrative}\n")
# 用非 trivial + 非白名單 的實際可操作數顯示
@@ -758,6 +773,23 @@ class DriftNarratorService:
return "\n".join(lines)
def _render_repeat_state(self, repeat_state: dict | None) -> str:
"""Render operator-visible repeat/stage metadata for Telegram."""
if not repeat_state:
return ""
fingerprint = str(repeat_state.get("fingerprint") or "unknown")
occurrences = int(repeat_state.get("occurrences_12h") or 0)
window_hours = int(repeat_state.get("window_hours") or 12)
stage = str(repeat_state.get("operator_stage") or "unknown")
if occurrences <= 1:
repeat_text = f"{window_hours}h 內首次出現"
else:
repeat_text = f"{window_hours}h 內第 {occurrences} 次同指紋"
return (
"流程: drift_scanned → ai_analyzed → "
f"{stage}\n重複: {repeat_text}\n指紋: {fingerprint}"
)
# ============================================================
# Singleton

View File

@@ -0,0 +1,180 @@
"""Stable repeat identity for Config Drift reports.
The drift scanner emits a fresh ``report_id`` for every run. Operators need a
stable identity that answers whether two reports describe the same drift, not
just whether they have the same HIGH/MEDIUM/INFO counts.
"""
from __future__ import annotations
import hashlib
import json
from datetime import datetime, timedelta, timezone
from typing import Any
SCHEMA_VERSION = "drift_repeat_state_v1"
FINGERPRINT_VERSION = "drift_fingerprint_v1"
def _get(obj: Any, key: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
def _enum_value(value: Any) -> Any:
return getattr(value, "value", value)
def _jsonable(value: Any) -> Any:
value = _enum_value(value)
if isinstance(value, dict):
return {str(k): _jsonable(v) for k, v in value.items()}
if isinstance(value, list):
return [_jsonable(v) for v in value]
if isinstance(value, tuple):
return [_jsonable(v) for v in value]
if isinstance(value, datetime):
return value.isoformat()
return value
def _canonical_json(value: Any) -> str:
return json.dumps(
_jsonable(value),
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=str,
)
def _parse_datetime(value: Any) -> datetime | None:
if value is None:
return None
if isinstance(value, datetime):
parsed = value
if parsed.tzinfo is not None:
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
return parsed
if isinstance(value, str):
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
if parsed.tzinfo is not None:
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
return parsed
except ValueError:
return None
return None
def _iso(value: Any) -> str | None:
parsed = _parse_datetime(value)
return parsed.isoformat() if parsed else None
def drift_item_identity(item: Any) -> dict[str, Any]:
"""Return the stable fields that define one drift item."""
return {
"resource_kind": str(_get(item, "resource_kind", "")),
"resource_name": str(_get(item, "resource_name", "")),
"namespace": str(_get(item, "namespace", "")),
"field_path": str(_get(item, "field_path", "")),
"drift_level": str(_enum_value(_get(item, "drift_level", ""))),
"git_value": _jsonable(_get(item, "git_value")),
"actual_value": _jsonable(_get(item, "actual_value")),
"is_allowlisted": bool(_get(item, "is_allowlisted", False)),
}
def build_drift_fingerprint(namespace: str, items: list[Any]) -> str:
"""Build a deterministic fingerprint from namespace + sorted drift items."""
identities = [drift_item_identity(item) for item in items]
identities.sort(key=_canonical_json)
payload = {
"version": FINGERPRINT_VERSION,
"namespace": namespace,
"items": identities,
}
digest = hashlib.sha256(_canonical_json(payload).encode("utf-8")).hexdigest()
return f"dfp_{digest[:16]}"
def _report_identity(report: Any) -> dict[str, Any]:
items = _get(report, "items", []) or []
namespace = str(_get(report, "namespace", ""))
return {
"report_id": _get(report, "report_id"),
"namespace": namespace,
"status": str(_enum_value(_get(report, "status", ""))),
"scanned_at": _get(report, "scanned_at"),
"created_at": _get(report, "created_at"),
"fingerprint": build_drift_fingerprint(namespace, list(items)),
}
def build_drift_repeat_state(
report: Any,
recent_reports: list[Any],
*,
window_hours: int = 12,
max_reports: int = 20,
) -> dict[str, Any]:
"""Summarize repeat state for one drift report using stable fingerprints."""
current = _report_identity(report)
current_time = (
_parse_datetime(current.get("scanned_at"))
or _parse_datetime(current.get("created_at"))
or datetime.now()
)
cutoff = current_time - timedelta(hours=window_hours)
by_id: dict[str, dict[str, Any]] = {}
for candidate in [report, *recent_reports]:
identity = _report_identity(candidate)
report_id = str(identity.get("report_id") or "")
if not report_id:
continue
candidate_time = (
_parse_datetime(identity.get("scanned_at"))
or _parse_datetime(identity.get("created_at"))
)
if candidate_time is not None and candidate_time < cutoff:
continue
if identity["fingerprint"] != current["fingerprint"]:
continue
by_id[report_id] = identity
matches = sorted(
by_id.values(),
key=lambda row: (
_parse_datetime(row.get("scanned_at"))
or _parse_datetime(row.get("created_at"))
or datetime.min
),
)
first = matches[0] if matches else current
last = matches[-1] if matches else current
status = current.get("status") or "unknown"
operator_stage = "pending_human" if status == "pending" else str(status)
return {
"schema_version": SCHEMA_VERSION,
"fingerprint": current["fingerprint"],
"matching_strategy": "namespace_and_stable_items_v1",
"window_hours": window_hours,
"occurrences_12h": len(matches),
"first_scanned_at": _iso(first.get("scanned_at") or first.get("created_at")),
"last_scanned_at": _iso(last.get("scanned_at") or last.get("created_at")),
"operator_stage": operator_stage,
"reports": [
{
"report_id": row.get("report_id"),
"scanned_at": _iso(row.get("scanned_at")),
"created_at": _iso(row.get("created_at")),
"status": row.get("status"),
}
for row in reversed(matches[-max_reports:])
],
}

View File

@@ -71,7 +71,7 @@ class BaselineState:
}
@classmethod
def from_dict(cls, d: dict[str, Any]) -> "BaselineState":
def from_dict(cls, d: dict[str, Any]) -> BaselineState:
return cls(
metric_name=d["metric_name"],
mean=d["mean"],
@@ -250,6 +250,7 @@ class DynamicBaselineService:
) -> list[MetricDatapoint]:
"""從 Prometheus query_range API 抓取歷史資料1h 步進)。"""
import httpx
from src.core.config import settings
end_ts = now_taipei().timestamp()
@@ -314,7 +315,7 @@ class DynamicBaselineService:
seasonal="add" if len(arr) >= seasonal_periods * 2 else None,
seasonal_periods=seasonal_periods,
initialization_method="estimated",
).fit(optimized=True, disp=False)
).fit(optimized=True)
fitted = model.fittedvalues
residuals = arr - fitted
@@ -423,11 +424,10 @@ class DynamicBaselineService:
async def _pg_upsert_baseline(self, state: BaselineState, promql: str, lookback_hours: int) -> None:
"""寫入 DynamicBaselineRecord 到 PostgreSQLINSERT不更新舊記錄"""
try:
from src.db.base import get_session_factory
from src.db.base import get_db_context
from src.db.models import DynamicBaselineRecord
factory = get_session_factory()
async with factory() as session:
async with get_db_context() as session:
record = DynamicBaselineRecord(
metric_name=state.metric_name,
mean=state.mean,
@@ -447,11 +447,11 @@ class DynamicBaselineService:
"""從 PostgreSQL 載入最新一筆基線記錄"""
try:
from sqlalchemy import select
from src.db.base import get_session_factory
from src.db.base import get_db_context
from src.db.models import DynamicBaselineRecord
factory = get_session_factory()
async with factory() as session:
async with get_db_context() as session:
stmt = (
select(DynamicBaselineRecord)
.where(DynamicBaselineRecord.metric_name == metric_name)

Some files were not shown because too many files have changed in this diff Show More