Files
ewoooc/services/import_service.py
OoO 2f5666be08 feat(import): drive staleness gate active alert when DB stale >=3d (critic-approved)
Root cause (debugger 2026-05-02):
  - drive_service.move_file 把 import 完的 Excel 搬到「已匯入」 → 工作夾空
  - auto_import_from_drive 每 30 分跑一次 list 回空 → 走 line 671-677
    silent return {success:True, file_count:0} → daily_sales_snapshot
    自 4/27 12:54 後停更 8 天無任何告警

Patch:
  Drive 空時加 staleness gate:查 DB MAX(snapshot_date),若距今 >=3 天
  即呼叫既有 _send_data_stale_alert(commit dda0a06)發 Telegram 主動催促 BU 上傳。

critic 5 點修訂全照做:
  1. session 自管:from database.manager import get_session + try/finally close
     (此區段原本沒有 session 變數,在 line 708 才開)
  2. 閾值 >=3 天:跨週末跨假期不上傳是常態,避免誤觸
  3. 沿用 _send_data_stale_alert 既有 3 字串參數,不客製文案
  4. dedupe key = 'upstream_drive':與下游 daily_report/weekly_strategy/
     monthly_report 區隔,未來 ai_insights 查詢不混淆告警源
  5. logger.error + exc_info=True:staleness 邏輯吞 try/except 不影響主流程
     return success,但留 traceback 可觀測

Regression check:
  - openclaw_strategist_service 不 import import_service → 無循環 import
  - import path 'database.manager' 與 openclaw 既有用法一致(line 31)
  - 找到檔案分支完全未動,主流程行為不變

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 13:10:55 +08:00

853 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
自動匯入服務
負責從 Google Drive 自動下載、匯入、刪除檔案
"""
import os
import logging
import json
from datetime import datetime
from typing import Optional, Dict, Any
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import pandas as pd
import pytz
# 台北時區
TAIPEI_TZ = pytz.timezone('Asia/Taipei')
from services.google_drive_service import drive_service
from database.import_models import ImportJob, ImportConfig, Base
from database.manager import ensure_metadata_initialized
# 設定日誌
logger = logging.getLogger(__name__)
# 資料庫設定 - 使用 config.py 中的設定,支援 PostgreSQL 和 SQLite
def _create_engine_with_pool(db_path):
"""建立帶有連線池配置的資料庫引擎"""
if db_path.startswith('postgresql://'):
return create_engine(
db_path,
pool_pre_ping=True,
pool_size=5,
max_overflow=10,
pool_recycle=1800,
pool_timeout=30,
connect_args={
'connect_timeout': 10,
'options': '-c statement_timeout=120000' # 匯入需要更長的超時
}
)
elif db_path.startswith('sqlite://'):
return create_engine(db_path)
else:
return create_engine(f'sqlite:///{db_path}')
try:
from config import DATABASE_PATH as CONFIG_DATABASE_PATH
engine = _create_engine_with_pool(CONFIG_DATABASE_PATH)
logger.info(f"使用資料庫: {CONFIG_DATABASE_PATH.split('@')[-1] if '@' in CONFIG_DATABASE_PATH else CONFIG_DATABASE_PATH}")
except ImportError:
# 備援方案:使用環境變數或預設值
DATABASE_PATH = os.getenv('DATABASE_PATH', 'data/momo_database.db')
engine = _create_engine_with_pool(DATABASE_PATH)
logger.warning(f"無法匯入 config使用備援資料庫路徑: {DATABASE_PATH}")
Session = sessionmaker(bind=engine)
class ImportService:
"""匯入服務類別"""
def __init__(self):
"""初始化匯入服務"""
self._init_database()
def _init_database(self):
"""初始化資料庫表"""
try:
ensure_metadata_initialized(engine, use_postgres_lock=str(engine.url).startswith('postgresql'))
logger.info("匯入追蹤表已初始化")
except Exception as e:
logger.error(f"初始化資料庫表失敗: {str(e)}")
def get_config(self, key: str, default: str = None) -> Optional[str]:
"""
取得配置值
Args:
key: 配置鍵
default: 預設值
Returns:
Optional[str]: 配置值
"""
session = Session()
try:
config = session.query(ImportConfig).filter_by(config_key=key).first()
if config:
return config.config_value
return default
finally:
session.close()
def set_config(self, key: str, value: str, config_type: str = 'string', description: str = None):
"""
設定配置值
Args:
key: 配置鍵
value: 配置值
config_type: 配置類型
description: 配置說明
"""
session = Session()
try:
config = session.query(ImportConfig).filter_by(config_key=key).first()
if config:
config.config_value = value
config.config_type = config_type
if description:
config.description = description
config.updated_at = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
else:
config = ImportConfig(
config_key=key,
config_value=value,
config_type=config_type,
description=description
)
session.add(config)
session.commit()
logger.info(f"配置已更新: {key} = {value}")
except Exception as e:
session.rollback()
logger.error(f"設定配置失敗: {str(e)}")
finally:
session.close()
def create_import_job(self, job_type: str, drive_file_id: str, drive_file_name: str,
drive_file_size: int = None) -> Optional[int]:
"""
建立匯入任務
Args:
job_type: 任務類型daily_sales 或 vendor_stockout
drive_file_id: Google Drive 檔案 ID
drive_file_name: 檔案名稱
drive_file_size: 檔案大小
Returns:
Optional[int]: 任務 ID
"""
session = Session()
try:
job = ImportJob(
job_type=job_type,
status='pending',
drive_file_id=drive_file_id,
drive_file_name=drive_file_name,
drive_file_size=drive_file_size,
progress_percent=0.0,
current_step='等待開始...'
)
session.add(job)
session.commit()
job_id = job.id
logger.info(f"已建立匯入任務: ID={job_id}, 檔案={drive_file_name}")
return job_id
except Exception as e:
session.rollback()
logger.error(f"建立匯入任務失敗: {str(e)}")
return None
finally:
session.close()
def update_job_status(self, job_id: int, status: str, progress: float = None,
current_step: str = None, error_message: str = None):
"""
更新任務狀態
Args:
job_id: 任務 ID
status: 狀態
progress: 進度百分比
current_step: 當前步驟
error_message: 錯誤訊息
"""
session = Session()
try:
job = session.query(ImportJob).filter_by(id=job_id).first()
if not job:
logger.warning(f"找不到任務: ID={job_id}")
return
job.status = status
if progress is not None:
job.progress_percent = progress
if current_step:
job.current_step = current_step
if error_message:
job.error_message = error_message
# 更新時間戳 (2026-01-30 修正:使用台北時區)
if status == 'downloading' or status == 'importing':
if not job.started_at:
job.started_at = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
elif status in ['completed', 'failed']:
job.completed_at = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
session.commit()
logger.info(f"任務 {job_id} 狀態已更新: {status} ({progress}%)")
except Exception as e:
session.rollback()
logger.error(f"更新任務狀態失敗: {str(e)}")
finally:
session.close()
def update_job_progress(self, job_id: int, total_rows: int = None, processed_rows: int = None,
success_rows: int = None, error_rows: int = None):
"""
更新任務進度
Args:
job_id: 任務 ID
total_rows: 總行數
processed_rows: 已處理行數
success_rows: 成功行數
error_rows: 錯誤行數
"""
session = Session()
try:
job = session.query(ImportJob).filter_by(id=job_id).first()
if not job:
return
if total_rows is not None:
job.total_rows = total_rows
if processed_rows is not None:
job.processed_rows = processed_rows
if success_rows is not None:
job.success_rows = success_rows
if error_rows is not None:
job.error_rows = error_rows
# 計算進度百分比
if job.total_rows and job.processed_rows:
job.progress_percent = (job.processed_rows / job.total_rows) * 100
session.commit()
except Exception as e:
session.rollback()
logger.error(f"更新任務進度失敗: {str(e)}")
finally:
session.close()
def get_job_status(self, job_id: int) -> Optional[Dict[str, Any]]:
"""
取得任務狀態
Args:
job_id: 任務 ID
Returns:
Optional[Dict]: 任務資訊
"""
session = Session()
try:
job = session.query(ImportJob).filter_by(id=job_id).first()
if job:
return job.to_dict()
return None
finally:
session.close()
def get_recent_jobs(self, limit: int = 20) -> list:
"""
取得最近的任務清單
Args:
limit: 返回數量
Returns:
list: 任務清單
"""
session = Session()
try:
jobs = session.query(ImportJob).order_by(
ImportJob.created_at.desc()
).limit(limit).all()
return [job.to_dict() for job in jobs]
finally:
session.close()
def process_daily_sales_import(self, job_id: int, file_path: str) -> bool:
"""
處理當日業績匯入
Args:
job_id: 任務 ID
file_path: Excel 檔案路徑
Returns:
bool: 是否成功
"""
try:
self.update_job_status(job_id, 'importing', 50, '正在匯入資料...')
# 讀取 Excel 檔案
logger.info(f"開始讀取 Excel 檔案: {file_path}")
df = pd.read_excel(file_path, engine='openpyxl', dtype=str)
if df.empty:
error_msg = "Excel 檔案為空"
self.update_job_status(job_id, 'failed', 50, '匯入失敗', error_msg)
return False
# ─────────────────────────────────────────────
# 2026-04-19: daily_sales_snapshot 前置欄位防禦 (技術債修復)
# 原因:若 Excel 欄位名靜默變更,匯入會成功但 Hermes SQL JOIN 會找不到數據 → 告警管線失真
# 規則:至少需偵測到「商品名稱」與「銷售金額」類欄位 (容忍多種別名)
# ─────────────────────────────────────────────
def _has_any(cols, keywords):
return any(kw in c for c in cols for kw in keywords)
required_groups = {
"商品名稱類": ["商品名稱", "品名", "Product", "Name"],
"業績金額類": ["銷售金額", "業績", "金額", "Amount", "Sales", "Total"],
}
missing = [label for label, kws in required_groups.items()
if not _has_any(df.columns, kws)]
if missing:
error_msg = (
f"Excel 欄位防禦失敗:缺少必要欄位分類 {missing}"
f"現有欄位:{list(df.columns)[:30]}"
)
logger.error(error_msg)
self.update_job_status(job_id, 'failed', 50, '欄位驗證失敗', error_msg)
return False
# 匯入到資料庫
table_name = 'daily_sales_snapshot'
# 找到日期欄位
date_col = None
for possible_col in ['日期', '訂單日期', '交易日期', 'Date']:
if possible_col in df.columns:
date_col = possible_col
break
if date_col:
# 解析日期
df['snapshot_date'] = pd.to_datetime(df[date_col], errors='coerce').dt.date
logger.info(f"使用日期欄位: {date_col}")
else:
# 使用當前日期
df['snapshot_date'] = datetime.now(TAIPEI_TZ).date()
logger.info("未找到日期欄位,使用當前日期(台北時區)")
# 寫入資料庫 - 使用全域的 engine支援 PostgreSQL 和 SQLite
from sqlalchemy import text
# 使用模組頂部定義的 engine確保連接到正確的資料庫
# 更新進度
total_rows = len(df)
self.update_job_progress(job_id, total_rows=total_rows, processed_rows=0)
# 取得此次匯入的日期範圍
import_dates = df['snapshot_date'].unique()
logger.info(f"本次匯入包含 {len(import_dates)} 個日期的資料")
# 刪除資料庫中相同日期的舊資料(覆蓋邏輯)
if len(import_dates) > 0:
# 過濾掉 None 值
valid_dates = [d for d in import_dates if d is not None]
if valid_dates:
# 將日期轉換為字串格式用於 SQL 查詢
date_list = ', '.join([f"'{d}'" for d in valid_dates])
with engine.connect() as conn:
# 刪除相同日期的舊資料
delete_query = text(f"DELETE FROM {table_name} WHERE snapshot_date IN ({date_list})")
result = conn.execute(delete_query)
deleted_count = result.rowcount
conn.commit()
if deleted_count > 0:
logger.info(f"已刪除 {deleted_count} 筆舊資料(覆蓋模式)")
# 寫入資料庫(帶驗證和重試機制)
max_retries = 2
retry_count = 0
write_success = False
while retry_count <= max_retries and not write_success:
try:
if retry_count > 0:
logger.warning(f"任務 {job_id}{retry_count} 次重試寫入...")
self.update_job_status(job_id, 'importing', 60, f'重試寫入中 ({retry_count}/{max_retries})...')
df.to_sql(
table_name,
engine,
if_exists='append',
index=False,
method='multi',
chunksize=1000
)
# V-Fix: 匯入後驗證 - 確認資料已正確寫入資料庫
self.update_job_status(job_id, 'importing', 85, '驗證資料寫入...')
# 取得本次匯入的日期
import_dates = df['snapshot_date'].dropna().unique()
if len(import_dates) > 0:
# 查詢資料庫中這些日期的資料筆數
from sqlalchemy import text
valid_dates = [str(d) for d in import_dates if d is not None]
date_list = ', '.join([f"'{d}'" for d in valid_dates])
with engine.connect() as conn:
verify_query = text(f"SELECT COUNT(*) FROM {table_name} WHERE snapshot_date IN ({date_list})")
result = conn.execute(verify_query)
db_count = result.scalar()
# 驗證:資料庫筆數應該 >= 本次匯入筆數(可能有其他日期的舊資料)
expected_count = len(df[df['snapshot_date'].isin(valid_dates)])
if db_count >= expected_count:
logger.info(f"任務 {job_id} 驗證成功: 預期 {expected_count} 筆, 資料庫有 {db_count}")
write_success = True
else:
logger.warning(f"任務 {job_id} 驗證失敗: 預期 {expected_count} 筆, 資料庫只有 {db_count}")
retry_count += 1
else:
# 沒有有效日期,跳過驗證
logger.warning(f"任務 {job_id} 無法驗證: 沒有有效的 snapshot_date")
write_success = True
except Exception as write_error:
logger.error(f"任務 {job_id} 寫入失敗 (嘗試 {retry_count + 1}): {str(write_error)}")
retry_count += 1
if retry_count > max_retries:
raise write_error
if not write_success:
error_msg = f"資料寫入驗證失敗,已重試 {max_retries}"
self.update_job_status(job_id, 'failed', 85, '驗證失敗', error_msg)
logger.error(f"任務 {job_id} {error_msg}")
return False
# === V-New 2026-01-15: 同步寫入 realtime_sales_monthly ===
# 目的:讓當日業績 raw data 同時呈現在「業績分析儀表板」
# 2026-01-30 修復:加強欄位驗證、同步狀態追蹤、失敗告警
self.update_job_status(job_id, 'importing', 90, '同步至業績分析儀表板...')
sync_success = False
sync_error_msg = None
monthly_table = 'realtime_sales_monthly'
try:
# 準備資料:移除 snapshot_date 欄位realtime_sales_monthly 不需要此欄位)
df_monthly = df.drop(columns=['snapshot_date'], errors='ignore')
# 2026-01-30 修正:強化欄位名稱轉換
# 將特殊字符轉換為 PostgreSQL 安全格式
column_mapping = {}
for col in df_monthly.columns:
new_col = col.replace('%', '_pct').replace('(', '_').replace(')', '_')
column_mapping[col] = new_col
df_monthly = df_monthly.rename(columns=column_mapping)
# 記錄轉換的欄位
converted_cols = [f"'{k}' -> '{v}'" for k, v in column_mapping.items() if k != v]
if converted_cols:
logger.info(f"任務 {job_id} 欄位名稱轉換: {', '.join(converted_cols)}")
logger.info(f"任務 {job_id} 欄位轉換完成,共 {len(df_monthly.columns)} 個欄位")
# 2026-01-30 新增:驗證 DataFrame 欄位和目標表欄位是否一致
with engine.connect() as conn:
col_query = text(f"""
SELECT column_name FROM information_schema.columns
WHERE table_name = '{monthly_table}' AND column_name != 'id'
ORDER BY ordinal_position
""")
result = conn.execute(col_query)
target_columns = set([row[0] for row in result])
df_columns = set(df_monthly.columns)
missing_in_table = df_columns - target_columns
missing_in_df = target_columns - df_columns
if missing_in_table:
logger.warning(f"任務 {job_id} 欄位警告: DataFrame 有但表中沒有: {missing_in_table}")
# 移除表中沒有的欄位,避免 INSERT 失敗
df_monthly = df_monthly.drop(columns=list(missing_in_table), errors='ignore')
logger.info(f"任務 {job_id} 已移除多餘欄位,剩餘 {len(df_monthly.columns)} 個欄位")
if missing_in_df:
logger.warning(f"任務 {job_id} 欄位警告: 表中有但 DataFrame 沒有: {missing_in_df}")
# 取得本次匯入的日期列表(使用原始「日期」欄位)
unique_dates = []
if '日期' in df.columns:
unique_dates = df['日期'].dropna().unique().tolist()
logger.info(f"任務 {job_id} 準備同步 {len(unique_dates)} 個日期的資料")
if len(unique_dates) > 0:
# 刪除 realtime_sales_monthly 中相同日期的舊資料(去重)
date_list_monthly = ', '.join([f"'{d}'" for d in unique_dates])
with engine.connect() as conn:
delete_monthly_query = text(f'DELETE FROM {monthly_table} WHERE "日期" IN ({date_list_monthly})')
result = conn.execute(delete_monthly_query)
deleted_monthly = result.rowcount
conn.commit()
if deleted_monthly > 0:
logger.info(f"任務 {job_id} 已從 {monthly_table} 刪除 {deleted_monthly} 筆同日期舊資料")
# 寫入 realtime_sales_monthly
df_monthly.to_sql(
monthly_table,
engine,
if_exists='append',
index=False,
method='multi',
chunksize=1000
)
logger.info(f"任務 {job_id} 已同步 {len(df_monthly)} 筆資料至 {monthly_table}")
# 驗證同步結果
if len(unique_dates) > 0:
with engine.connect() as conn:
date_list_verify = ', '.join([f"'{d}'" for d in unique_dates])
verify_query = text(f'SELECT COUNT(*) FROM {monthly_table} WHERE "日期" IN ({date_list_verify})')
verify_count = conn.execute(verify_query).scalar()
if verify_count >= len(df_monthly):
logger.info(f"任務 {job_id} 同步驗證成功: {monthly_table} 現有 {verify_count} 筆資料")
sync_success = True
else:
sync_error_msg = f"同步驗證失敗: 預期 {len(df_monthly)} 筆, 實際 {verify_count}"
logger.error(f"任務 {job_id} {sync_error_msg}")
else:
sync_success = True # 沒有日期資料時視為成功
except Exception as sync_error:
# 同步失敗,記錄完整錯誤
import traceback
sync_error_msg = str(sync_error)
logger.error(f"任務 {job_id} 同步至 {monthly_table} 失敗: {sync_error_msg}")
logger.error(f"任務 {job_id} 同步錯誤堆疊:\n{traceback.format_exc()}")
# 2026-01-30 新增:發送同步失敗告警
try:
from services.notification_manager import NotificationManager
notifier = NotificationManager()
alert_msg = (
f"⚠️ 業績資料同步失敗告警\n"
f"{'='*30}\n"
f"任務 ID: {job_id}\n"
f"目標表: {monthly_table}\n"
f"錯誤: {sync_error_msg[:200]}\n"
f"{'='*30}\n"
f"daily_sales_snapshot 已匯入成功,但業績分析儀表板需要手動同步"
)
notifier._send_telegram_messages([alert_msg])
logger.info(f"任務 {job_id} 已發送同步失敗告警")
except Exception as notify_error:
logger.error(f"任務 {job_id} 發送告警失敗: {notify_error}")
# 更新成功資訊
self.update_job_progress(
job_id,
processed_rows=total_rows,
success_rows=total_rows
)
# 2026-01-30 修正:根據同步狀態設置完成訊息
if sync_success:
completion_msg = '匯入完成(已同步至業績分析儀表板)'
else:
completion_msg = '匯入完成(警告:業績分析儀表板同步失敗,需手動處理)'
self.update_job_status(
job_id,
'completed',
100,
completion_msg
)
# 計算日期範圍
date_min = None
date_max = None
valid_dates = df['snapshot_date'].dropna().unique()
if len(valid_dates) > 0:
sorted_dates = sorted([d for d in valid_dates if d is not None])
if sorted_dates:
date_min = str(sorted_dates[0])
date_max = str(sorted_dates[-1])
logger.info(f"任務 {job_id} 日期範圍: {date_min} ~ {date_max}")
# 更新匯入摘要 (2026-01-30 修正:加入同步狀態)
if sync_success:
sync_message = f'成功匯入 {total_rows} 筆資料,已同步至業績分析儀表板'
else:
sync_message = f'成功匯入 {total_rows} 筆資料,但同步至業績分析儀表板失敗: {sync_error_msg}'
summary = {
'imported_count': total_rows,
'table_name': table_name,
'synced_to': 'realtime_sales_monthly' if sync_success else None,
'sync_success': sync_success,
'sync_error': sync_error_msg,
'verified': True, # daily_sales_snapshot 驗證
'date_min': date_min,
'date_max': date_max,
'message': sync_message
}
session = Session()
try:
job = session.query(ImportJob).filter_by(id=job_id).first()
if job:
job.import_summary = json.dumps(summary, ensure_ascii=False)
session.commit()
finally:
session.close()
logger.info(f"任務 {job_id} 匯入成功: {total_rows}")
# cache 失效改靠 _get_data_fingerprintDB max(snapshot_date)+count(*)
# 寫入後指紋自動跳號4 worker 下一次 request 時各自偵測失效,
# 取代不可靠的 N-POST hack命中率僅 9.4%,見 web-researcher 報告)。
return True
except Exception as e:
error_msg = f"匯入過程發生異常: {str(e)}"
self.update_job_status(job_id, 'failed', 50, '匯入失敗', error_msg)
logger.error(f"任務 {job_id} 匯入異常: {str(e)}")
import traceback
logger.error(traceback.format_exc())
return False
def auto_import_from_drive(self) -> Dict[str, Any]:
"""
從 Google Drive 自動匯入檔案
Returns:
Dict: 執行結果
"""
try:
# 取得配置
folder_path = self.get_config('gdrive_folder_path', '業績報表/當日業績')
file_pattern = self.get_config('gdrive_file_pattern', '即時業績_當日')
logger.info(f"開始檢查 Google Drive: {folder_path}")
# 列出檔案
files = drive_service.list_files_in_folder(folder_path, file_pattern)
if not files:
logger.info("沒有找到待匯入的檔案")
# Staleness gate (critic-approved 2026-05-03)
# 'move-then-success' 反模式:成功 import 後 move_file 把 Excel 搬到
# 「已匯入」資料夾 → 後續排程 list 回空 → 走此分支 silent return success
# → 4/27~5/2 daily_sales_snapshot 停更 8 天無告警。補主動偵測:
# Drive 空 + DB ≥3 天無新資料時主動發催促告警(週末跨假期不誤觸)。
try:
from database.manager import get_session
from sqlalchemy import text
from datetime import date
from services.openclaw_strategist_service import _send_data_stale_alert
_stale_session = get_session()
try:
last_date = _stale_session.execute(
text("SELECT MAX(snapshot_date)::date FROM daily_sales_snapshot")
).scalar()
finally:
_stale_session.close()
if last_date:
days_since = (date.today() - last_date).days
if days_since >= 3:
_send_data_stale_alert(
report_type="upstream_drive",
last_date=str(last_date),
period=f"已停更 {days_since}",
)
except Exception:
logger.error(
"staleness check failed in auto_import_from_drive",
exc_info=True,
)
return {
'success': True,
'message': '沒有找到待匯入的檔案',
'file_count': 0
}
# 處理每個檔案
imported_count = 0
total_rows = 0
all_dates = [] # 收集所有匯入的日期
for file in files:
file_id = file['id']
file_name = file['name']
file_size = file.get('size', 0)
logger.info(f"發現檔案: {file_name}")
# 建立匯入任務
job_id = self.create_import_job('daily_sales', file_id, file_name, file_size)
if not job_id:
continue
# 下載檔案
self.update_job_status(job_id, 'downloading', 10, '正在下載檔案...')
temp_dir = 'data/temp'
os.makedirs(temp_dir, exist_ok=True)
local_path = os.path.join(temp_dir, file_name)
if not drive_service.download_file(file_id, local_path):
self.update_job_status(job_id, 'failed', 10, '下載失敗', '無法從 Google Drive 下載檔案')
continue
# 更新本地路徑
session = Session()
try:
job = session.query(ImportJob).filter_by(id=job_id).first()
if job:
job.local_file_path = local_path
session.commit()
finally:
session.close()
self.update_job_status(job_id, 'downloading', 40, '下載完成')
# 匯入資料
if self.process_daily_sales_import(job_id, local_path):
# 移動 Google Drive 檔案到「已匯入」資料夾
self.update_job_status(job_id, 'completed', 90, '正在移動雲端檔案...')
# 取得「已匯入」資料夾路徑配置
archive_folder = self.get_config('gdrive_archive_folder', '已匯入')
if drive_service.move_file(file_id, archive_folder):
logger.info(f"已移動 Google Drive 檔案到「{archive_folder}」: {file_name}")
else:
logger.warning(f"無法移動 Google Drive 檔案: {file_name}")
self.update_job_status(job_id, 'completed', 100, '完成')
imported_count += 1
# 讀取 job summary 取得匯入筆數和日期範圍
session = Session()
try:
job = session.query(ImportJob).filter_by(id=job_id).first()
if job and job.import_summary:
summary = json.loads(job.import_summary)
total_rows += summary.get('imported_count', 0)
if summary.get('date_min'):
all_dates.append(summary['date_min'])
if summary.get('date_max'):
all_dates.append(summary['date_max'])
finally:
session.close()
# 清理本地檔案
try:
os.remove(local_path)
logger.info(f"已清理本地檔案: {local_path}")
except Exception as e:
logger.warning(f"清理本地檔案失敗: {str(e)}")
# 計算日期範圍
date_range = None
if all_dates:
sorted_dates = sorted(set(all_dates))
if sorted_dates:
date_range = {
'min': sorted_dates[0],
'max': sorted_dates[-1]
}
return {
'success': True,
'message': f'成功匯入 {imported_count} 個檔案',
'file_count': len(files),
'imported_count': imported_count,
'total_rows': total_rows,
'date_range': date_range
}
except Exception as e:
error_msg = str(e)
logger.error(f"自動匯入失敗: {error_msg}")
# 區分連線錯誤和真正的匯入錯誤
# 連線錯誤(如 Broken pipe、網路問題不應發送告警
connection_errors = [
'Broken pipe',
'Connection refused',
'Connection reset',
'Connection timed out',
'Name or service not known',
'No route to host',
'Network is unreachable',
'SSL',
'authenticate',
'credentials',
'token'
]
is_connection_error = any(err.lower() in error_msg.lower() for err in connection_errors)
if is_connection_error:
# 連線錯誤:返回成功但無檔案(避免發送告警)
logger.warning(f"Google Drive 連線問題,跳過本次匯入檢查: {error_msg}")
return {
'success': True, # 標記為成功避免告警
'message': f'Google Drive 連線問題,跳過本次檢查',
'file_count': 0,
'imported_count': 0,
'connection_error': True # 標記為連線錯誤供日誌記錄
}
else:
# 真正的匯入錯誤:返回失敗
return {
'success': False,
'message': f'自動匯入失敗: {error_msg}',
'file_count': 0,
'imported_count': 0
}
# 建立全域服務實例
import_service = ImportService()