# ===================================================================== # AnchorSOL® Wall - robots.txt # Sitemap: https://anchorsolwall.com/sitemap.xml # LLM/AI summary: https://anchorsolwall.com/llms.txt # ===================================================================== # Default: everything is crawlable, dev folders disallowed User-agent: * Allow: / Disallow: /research/ Disallow: /extracted/ Disallow: /tools/ Disallow: /handoff/ Disallow: /originals/ # Legacy WordPress paths - the site used to run on WordPress. # These URLs no longer exist on the current static site, but Google still # tries to crawl them. Disallowing them tells Google to stop wasting crawl # budget on URLs that will only ever return 404. Disallow: /wp-admin/ Disallow: /wp-content/ Disallow: /wp-includes/ Disallow: /wp-login.php Disallow: /xmlrpc.php Disallow: /wp-json/ Disallow: /?p= Disallow: /?page_id= Disallow: /feed/ Disallow: /*/feed/ Disallow: /comments/feed/ Disallow: /tag/ Disallow: /category/ Disallow: /author/ Disallow: /industries-category/ Disallow: /industries/ Disallow: /faqs/ # Old WordPress spam/injected posts (drop these so Google forgets them): Disallow: /for-the-wealthy-work-is-the-new-retirement/ Disallow: /top-5-tips-for-solving-the-email-security-problem/ # Search engine crawlers, explicit welcome User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: DuckDuckBot Allow: / User-agent: Slurp Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / User-agent: Yeti Allow: / User-agent: SeznamBot Allow: / # AI / LLM crawlers, allowed (with a structured summary at /llms.txt) User-agent: GPTBot Allow: / User-agent: ClaudeBot Allow: / User-agent: PerplexityBot Allow: / User-agent: Google-Extended Allow: / # Sitemap reference (every crawler reads this) Sitemap: https://anchorsolwall.com/sitemap.xml