Files
HOALedgerIQ_Website/robots.txt

70 lines
2.4 KiB
Plaintext

# ================================================
# Optimal robots.txt for a SaaS Marketing Website
# ================================================
# Purpose: Maximize indexing of your public marketing pages (homepage, features, pricing,
# blog, case studies, etc.) while protecting private/admin areas and saving crawl budget.
#
# This is a clean, modern template optimized for SEO in 2026. It:
# • Allows ALL major search engines (Googlebot, Bingbot, etc.)
# • References your sitemap so crawlers discover every page fast
# • Blocks only non-marketing paths (customize these!)
# • Does NOT block AI trainers like GPTBot (they don't affect search rankings)
#
# Upload this file exactly as "robots.txt" (lowercase) to your site root.
# Test it at https://yourdomain.com/robots.txt and in Google Search Console.
User-agent: *
Allow: /
# ── Block private / admin / internal paths (ADD or REMOVE based on your structure) ──
Disallow: /admin/
Disallow: /dashboard/
Disallow: /account/
Disallow: /login/
Disallow: /signup/
Disallow: /api/
Disallow: /internal/
Disallow: /private/
Disallow: /staging/
Disallow: /preview/
Disallow: /thank-you/ # optional: conversion pages that don't need indexing
Disallow: /app/ # if your SaaS app lives on the same domain (common pattern)
# ── Block common junk that wastes crawl budget ──
Disallow: /cgi-bin/
Disallow: /.git/
Disallow: /.env
Disallow: /temp/
Disallow: /tmp/
Disallow: /backup/
# ── Optional: Block specific file types if you don't want them indexed
# Disallow: /*.pdf$ # uncomment only if you don't want PDFs in search results
# Disallow: /*?utm_* # optional: blocks tracking parameters (Google usually ignores these anyway)
# ================================================
# SITEMAP (CRITICAL for fast & complete indexing)
# ================================================
# Replace with your actual sitemap URL (most SaaS sites use /sitemap.xml or /sitemap_index.xml)
Sitemap: https://yourdomain.com/sitemap.xml
# ================================================
# Specific bots (optional but recommended for control)
# ================================================
User-agent: Googlebot
Allow: /
User-agent: Bingbot
Allow: /
User-agent: Slurp # Yahoo
Allow: /
# If you ever want to block AI scrapers (does NOT affect SEO indexing):
# User-agent: GPTBot
# Disallow: /
# User-agent: Google-Extended
# Disallow: /
# End of file