70 lines
2.4 KiB
Plaintext
70 lines
2.4 KiB
Plaintext
# ================================================
|
|
# Optimal robots.txt for a SaaS Marketing Website
|
|
# ================================================
|
|
# Purpose: Maximize indexing of your public marketing pages (homepage, features, pricing,
|
|
# blog, case studies, etc.) while protecting private/admin areas and saving crawl budget.
|
|
#
|
|
# This is a clean, modern template optimized for SEO in 2026. It:
|
|
# • Allows ALL major search engines (Googlebot, Bingbot, etc.)
|
|
# • References your sitemap so crawlers discover every page fast
|
|
# • Blocks only non-marketing paths (customize these!)
|
|
# • Does NOT block AI trainers like GPTBot (they don't affect search rankings)
|
|
#
|
|
# Upload this file exactly as "robots.txt" (lowercase) to your site root.
|
|
# Test it at https://yourdomain.com/robots.txt and in Google Search Console.
|
|
|
|
User-agent: *
|
|
Allow: /
|
|
|
|
# ── Block private / admin / internal paths (ADD or REMOVE based on your structure) ──
|
|
Disallow: /admin/
|
|
Disallow: /dashboard/
|
|
Disallow: /account/
|
|
Disallow: /login/
|
|
Disallow: /signup/
|
|
Disallow: /api/
|
|
Disallow: /internal/
|
|
Disallow: /private/
|
|
Disallow: /staging/
|
|
Disallow: /preview/
|
|
Disallow: /thank-you/ # optional: conversion pages that don't need indexing
|
|
Disallow: /app/ # if your SaaS app lives on the same domain (common pattern)
|
|
|
|
# ── Block common junk that wastes crawl budget ──
|
|
Disallow: /cgi-bin/
|
|
Disallow: /.git/
|
|
Disallow: /.env
|
|
Disallow: /temp/
|
|
Disallow: /tmp/
|
|
Disallow: /backup/
|
|
|
|
# ── Optional: Block specific file types if you don't want them indexed
|
|
# Disallow: /*.pdf$ # uncomment only if you don't want PDFs in search results
|
|
# Disallow: /*?utm_* # optional: blocks tracking parameters (Google usually ignores these anyway)
|
|
|
|
# ================================================
|
|
# SITEMAP (CRITICAL for fast & complete indexing)
|
|
# ================================================
|
|
# Replace with your actual sitemap URL (most SaaS sites use /sitemap.xml or /sitemap_index.xml)
|
|
Sitemap: https://yourdomain.com/sitemap.xml
|
|
|
|
# ================================================
|
|
# Specific bots (optional but recommended for control)
|
|
# ================================================
|
|
User-agent: Googlebot
|
|
Allow: /
|
|
|
|
User-agent: Bingbot
|
|
Allow: /
|
|
|
|
User-agent: Slurp # Yahoo
|
|
Allow: /
|
|
|
|
# If you ever want to block AI scrapers (does NOT affect SEO indexing):
|
|
# User-agent: GPTBot
|
|
# Disallow: /
|
|
# User-agent: Google-Extended
|
|
# Disallow: /
|
|
|
|
# End of file
|