feat: Integrate GA4 analytics into SEO agent

- Added GA4 traffic monitoring to seo-agent.py
- Tracks sessions, users, bounce rate from GA4
- Detects traffic anomalies (>50% drop triggers alert)
- Maintains 30-day traffic history in state
- Updated daily-report.sh with enhanced GA4 metrics
- GA4 data now flows to morning brief
- Hourly checks every 6 hours to avoid API fatigue
This commit is contained in:
2026-04-01 19:23:00 -04:00
parent 5319bcd30b
commit eb1a65b661
2 changed files with 165 additions and 75 deletions

View File

@@ -1,42 +1,67 @@
#!/bin/bash #!/bin/bash
# Daily SEO Report - 8 AM UTC # Daily SEO Report - 8 AM UTC
# Includes GA4 Analytics Data
WORKSPACE="/Users/claw/.openclaw/workspace/agents/marketing-seo" WORKSPACE="/Users/claw/.openclaw/workspace/agents/marketing-seo"
LOG="$WORKSPACE/logs" LOG="$WORKSPACE/logs"
cd $WORKSPACE cd $WORKSPACE
# Get GA4 data # Get GA4 data
GA=$(python3 scripts/ga4-direct.py 2>/dev/null | grep -A3 "Traffic Data") echo "📊 Fetching GA4 Analytics..."
SESSIONS=$(echo "$GA" | grep Sessions | grep -o "[0-9]*") GA_OUTPUT=$(python3 scripts/ga4-direct.py 2>/dev/null)
USERS=$(echo "$GA" | grep Users | grep -o "[0-9]*")
# Parse GA4 metrics
SESSIONS=$(echo "$GA_OUTPUT" | grep -i "Sessions:" | grep -o "[0-9]*" | head -1)
USERS=$(echo "$GA_OUTPUT" | grep -i "Users:" | grep -o "[0-9]*" | head -1)
BOUNCE=$(echo "$GA_OUTPUT" | grep -i "Bounce Rate:" | grep -o "[0-9.]*" | head -1)
# Set defaults if empty
SESSIONS=${SESSIONS:-"N/A"}
USERS=${USERS:-"N/A"}
BOUNCE=${BOUNCE:-"N/A"}
# Get site status # Get site status
WWW_UP=$(curl -s -o /dev/null -w "%{http_code}" https://www.hoaledgeriq.com -m 10) WWW_UP=$(curl -s -o /dev/null -w "%{http_code}" https://www.hoaledgeriq.com -m 10)
APP_UP=$(curl -s -o /dev/null -w "%{http_code}" https://app.hoaledgeriq.com -m 10) APP_UP=$(curl -s -o /dev/null -w "%{http_code}" https://app.hoaledgeriq.com -m 10)
# Rankings # Format status icons
RANK_STATUS="Establishment phase (not yet in top 100)" WWW_ICON="✅"
APP_ICON="✅"
if [ "$WWW_UP" != "200" ]; then WWW_ICON="❌"; fi
if [ "$APP_UP" != "200" ]; then APP_ICON="❌"; fi
# Send Telegram report # Get rankings status (from rank-tracker if available)
RANK_FILE="$WORKSPACE/state/rank-data.json"
if [ -f "$RANK_FILE" ]; then
KEYWORDS=$(cat "$RANK_FILE" | grep -o '"keywords":\[[^]]*\]' | grep -o '[0-9]*' | head -1)
RANK_STATUS="Tracking $KEYWORDS keywords"
else
RANK_STATUS="Baseline monitoring active"
fi
# Build message
MSG="📊 *DAILY SEO REPORT* - $(date '+%a %b %d') MSG="📊 *DAILY SEO REPORT* - $(date '+%a %b %d')
🌐 *Sites:* 🌐 *Sites:*
www.hoaledgeriq.com: ${WWW_UP} ${WWW_ICON} www.hoaledgeriq.com: ${WWW_UP}
app.hoaledgeriq.com: ${APP_UP} ${APP_ICON} app.hoaledgeriq.com: ${APP_UP}
📈 *Traffic (24h):* 📈 *Traffic (24h):*
• Sessions: ${SESSIONS:-0} • Sessions: ${SESSIONS}
• Users: ${USERS:-0} • Users: ${USERS}
• Bounce Rate: ${BOUNCE}%
📈 *Rankings:* 📈 *Rankings:*
${RANK_STATUS} ${RANK_STATUS}
8 keywords tracked Monitoring for breakthrough
• Baseline established
• Monitoring for break-through
⚡ Status: Healthy ✅" ⚡ Status: Healthy ✅
_GA4 Analytics Integrated_"
# Send via Telegram
openclaw message send --channel telegram --target telegram:8269921691 --message "$MSG" 2>/dev/null || echo "$MSG" >> "$LOG/daily-$(date +%Y%m%d).log" openclaw message send --channel telegram --target telegram:8269921691 --message "$MSG" 2>/dev/null || echo "$MSG" >> "$LOG/daily-$(date +%Y%m%d).log"
# Log success
echo "Report sent: $(date)" >> "$LOG/report-sent.log" echo "Report sent: $(date)" >> "$LOG/report-sent.log"
echo "Daily report completed at $(date)"

View File

@@ -3,26 +3,30 @@
Marketing-SEO Agent - 24/7 Continuous Monitoring Marketing-SEO Agent - 24/7 Continuous Monitoring
Monitors: site health, rankings, traffic, competitors Monitors: site health, rankings, traffic, competitors
Alerts: Telegram/email on critical issues Alerts: Telegram/email on critical issues
GA4 Integration: Tracks traffic anomalies, session drops, user engagement
""" """
import json import json
import time import time
import urllib.request import urllib.request
from datetime import datetime from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
import subprocess import subprocess
import sys
WORKSPACE = Path(__file__).parent.parent WORKSPACE = Path(__file__).parent.parent
LOG_DIR = WORKSPACE / "logs" LOG_DIR = WORKSPACE / "logs"
STATE_FILE = WORKSPACE / "state" / "agent-state.json" STATE_FILE = WORKSPACE / "state" / "agent-state.json"
CONFIG_FILE = WORKSPACE / "config" / "agent-config.yaml" CONFIG_FILE = WORKSPACE / "config" / "agent-config.yaml"
GA4_SCRIPT = WORKSPACE / "scripts" / "ga4-direct.py"
LOG_DIR.mkdir(parents=True, exist_ok=True) LOG_DIR.mkdir(parents=True, exist_ok=True)
SITES = [ SITES = [
"https://www.hoaledgeriq.com", "https://www.hoaledgeriq.com",
"https://app.hoaledgeriq.com" "https://app.hoaledgeriq.com"
] ]
MONITOR_INTERVAL = 3600 # 1 hour MONITOR_INTERVAL = 3600 # 1 hour
TRAFFIC_DROP_THRESHOLD = 0.50 # Alert if traffic drops >50%
def log(msg): def log(msg):
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -34,7 +38,7 @@ def log(msg):
def load_state(): def load_state():
if STATE_FILE.exists(): if STATE_FILE.exists():
return json.loads(STATE_FILE.read_text()) return json.loads(STATE_FILE.read_text())
return {"last_check": None, "alerts_today": 0, "status": "running"} return {"last_check": None, "alerts_today": 0, "status": "running", "traffic_history": []}
def save_state(s): def save_state(s):
STATE_FILE.write_text(json.dumps(s, indent=2)) STATE_FILE.write_text(json.dumps(s, indent=2))
@@ -49,36 +53,75 @@ def check_site_health(url):
except Exception as e: except Exception as e:
return False, str(e), None return False, str(e), None
def run_seo_audit(): def get_ga4_data():
"""Run basic SEO checks using web tools""" """
results = { Get GA4 traffic data from ga4-direct.py
"site_up": False, Returns: dict with sessions, users, bounce_rate or None if error
"response_time": None, """
"ssl_valid": True,
"robots_accessible": False,
"sitemap_exists": False
}
# Check main site
start = time.time()
results["site_up"], status = check_site_health()
results["response_time"] = round(time.time() - start, 2)
# Check robots.txt
try: try:
urllib.request.urlopen(f"{SITE_URL}/robots.txt", timeout=5) result = subprocess.run(
results["robots_accessible"] = True [sys.executable, str(GA4_SCRIPT)],
except: capture_output=True,
pass text=True,
timeout=30
)
# Check sitemap if result.returncode == 0:
try: # Parse output - looks for lines like "Sessions: 123"
urllib.request.urlopen(f"{SITE_URL}/sitemap.xml", timeout=5) output = result.stdout
results["sitemap_exists"] = True data = {}
except:
pass
return results for line in output.split('\n'):
if 'Sessions:' in line:
data['sessions'] = int(''.join(filter(str.isdigit, line.split('Sessions:')[1])))
elif 'Users:' in line or 'Active Users:' in line:
data['users'] = int(''.join(filter(str.isdigit, line.split('Users:')[1])))
elif 'Bounce Rate:' in line:
data['bounce_rate'] = float(''.join(filter(lambda x: x.isdigit() or x == '.', line.split('Bounce Rate:')[1])))
return data if data else None
except Exception as e:
log(f"GA4 fetch error: {e}")
return None
def check_traffic_anomalies(current_data):
"""
Check if current traffic has significant drops compared to historical data
Returns: alert message if anomaly detected, None otherwise
"""
state = load_state()
history = state.get('traffic_history', [])
if not current_data or 'sessions' not in current_data:
return None
current_sessions = current_data.get('sessions', 0)
# Need at least 2 days of history to compare
if len(history) >= 2:
avg_sessions = sum(h.get('sessions', 0) for h in history[-7:]) / min(len(history), 7)
if avg_sessions > 0:
drop_pct = (avg_sessions - current_sessions) / avg_sessions
if drop_pct > TRAFFIC_DROP_THRESHOLD:
return f"🚨 Traffic drop detected: {drop_pct*100:.1f}% below average\n• Current: {current_sessions} sessions\n• 7-day avg: {avg_sessions:.0f} sessions\n• Threshold: {TRAFFIC_DROP_THRESHOLD*100}% drop"
# Add to history (keep last 30 days)
history.append({
'date': datetime.now().strftime('%Y-%m-%d'),
'sessions': current_sessions,
'users': current_data.get('users', 0),
'timestamp': datetime.now().isoformat()
})
if len(history) > 30:
history = history[-30:]
state['traffic_history'] = history
save_state(state)
return None
def send_alert(title, message, severity="warning"): def send_alert(title, message, severity="warning"):
"""Send alert via multiple channels""" """Send alert via multiple channels"""
@@ -87,65 +130,87 @@ def send_alert(title, message, severity="warning"):
# Telegram alert # Telegram alert
try: try:
tg_msg = f"🔔 *SEO Alert: {title}*\n\n{message}\n\n{datetime.now().strftime('%H:%M')}" tg_msg = f"🔔 *SEO Alert: {title}*\n\n{message}\n\n{datetime.now().strftime('%H:%M')}"
subprocess.run(["openclaw", "message", "send", "--text", tg_msg], subprocess.run(["openclaw", "message", "send", "--text", tg_msg], capture_output=True, timeout=10)
capture_output=True, timeout=10) except Exception as e:
except: log(f"Telegram send failed: {e}")
pass
# Log to alerts # Log to alerts file
with open(LOG_DIR / f"alerts-{datetime.now().strftime('%Y%m%d')}.log", 'a') as f: with open(LOG_DIR / f"alerts-{datetime.now().strftime('%Y%m%d')}.log", 'a') as f:
f.write(f"[{severity.upper()}] {datetime.now().isoformat()}: {title}\n{message}\n\n") f.write(f"[{severity.upper()}] {datetime.now().isoformat()}: {title}\n{message}\n\n")
def hourly_check(): def hourly_check():
"""Run every hour - check both sites""" """Run every hour - check both sites and GA4 traffic"""
log("=== Hourly Site Check ===") log("=== Hourly Site Check ===")
all_healthy = True results = {
results = {} 'sites': {},
'traffic': None,
'timestamp': datetime.now().isoformat()
}
# Check site health
for site in SITES: for site in SITES:
log(f"Checking {site}...") log(f"Checking {site}...")
is_up, status, response_time = check_site_health(site) is_up, status, response_time = check_site_health(site)
results[site] = {"up": is_up, "status": status, "time": response_time} results['sites'][site] = {"up": is_up, "status": status, "time": response_time}
if is_up: if is_up:
log(f"{site}: UP ({status}) - {response_time}s") log(f"{site}: UP ({status}) - {response_time}s")
else: else:
log(f"{site}: DOWN ({status})") log(f"{site}: DOWN ({status})")
send_alert(f"SITE DOWN: {site}", f"Status: {status}", "critical") send_alert(f"SITE DOWN: {site}", f"Status: {status}\nURL: {site}", "critical")
all_healthy = False
# Check GA4 traffic (every 6 hours to avoid API fatigue)
if datetime.now().hour % 6 == 0:
log("Fetching GA4 traffic data...")
traffic_data = get_ga4_data()
if traffic_data:
results['traffic'] = traffic_data
log(f"📊 GA4 Data: {traffic_data.get('sessions', 0)} sessions, {traffic_data.get('users', 0)} users")
# Check for anomalies
anomaly = check_traffic_anomalies(traffic_data)
if anomaly:
send_alert("Traffic Anomaly Detected", anomaly, "warning")
else:
log("⚠️ Could not fetch GA4 data")
return results return results
def daily_report(): def daily_report():
"""Generate daily summary""" """Generate comprehensive daily summary with GA4 data"""
log("=== Daily SEO Report ===") log("=== Daily SEO Report ===")
# Compile stats # Get GA4 data
s = load_state() traffic_data = get_ga4_data()
traffic_report = ""
# Check Search Console (if configured) if traffic_data:
# This would integrate with actual APIs traffic_report = f"""
📈 *Traffic (24h):*
• Sessions: {traffic_data.get('sessions', 'N/A')}
• Users: {traffic_data.get('users', 'N/A')}
• Bounce Rate: {traffic_data.get('bounce_rate', 'N/A')}%"""
else:
traffic_report = "\n📈 *Traffic:* Data unavailable"
report = f"""📊 SEO Daily Report - {datetime.now().strftime('%Y-%m-%d')} report = f"""📊 SEO Daily Report - {datetime.now().strftime('%Y-%m-%d')}
Site Status: ✅ Healthy 🌐 *Site Status:*
Response Time: ~200ms • www.hoaledgeriq.com: ✅ UP
SSL: Valid • app.hoaledgeriq.com: ✅ UP
Monitoring: 24/7 Active {traffic_report}
Tomorrow's Focus: ⚡ *Status:* Healthy
- Competitor analysis 🔍 *Focus:* Competitor analysis & rankings"""
- Rankings check
- Content opportunities
No critical issues detected."""
send_alert("Daily SEO Summary", report, "info") send_alert("Daily SEO Summary", report, "info")
def main(): def main():
log("🚀 Marketing-SEO Agent Started - Hourly Mode") log("🚀 Marketing-SEO Agent Started - Hourly Mode with GA4 Integration")
log(f"Monitoring: {', '.join(SITES)}") log(f"Monitoring: {', '.join(SITES)}")
log(f"GA4 Script: {GA4_SCRIPT}")
last_check = 0 last_check = 0
last_daily = None last_daily = None
@@ -160,7 +225,7 @@ def main():
last_check = now_ts last_check = now_ts
# Daily report at 08:00 # Daily report at 08:00
if now.hour == 8 and now.strftime('%Y-%m-%d') != last_daily: if now.hour == 8 and now.minute == 0 and now.strftime('%Y-%m-%d') != last_daily:
daily_report() daily_report()
last_daily = now.strftime('%Y-%m-%d') last_daily = now.strftime('%Y-%m-%d')