# www.robotstxt.org rules - last updated 20240319 # https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/ # https://coywolf.today/blocking-generative-ai-bots-in-robots-txt/ # https://gist.github.com/henshaw/aa8b68ad8b7f897c709bd0ef4fd03b48#file-disallow-genai-bots-txt # # Cloudflare WAF rules # https://developers.cloudflare.com/waf/tools/user-agent-blocking/#cloudflare-user-agent-blocking # https://dash.cloudflare.com/b377376c57401198279f7fe161c53972/davidroessli.com/security/waf/custom-rules # User-agent: AdsBot-Google # Disallow: / # User-agent: Amazonbot # Disallow: / # User-agent: Anthropic-ai # Disallow: / # User-agent: AwarioRssBot # Disallow: / # User-agent: AwarioSmartBot # Disallow: / # User-agent: Bytespider # Disallow: / # User-agent: CCBot # Disallow: / # User-agent: ChatGPT-User # Disallow: / # User-agent: ClaudeBot # Disallow: / # User-agent: Claude-Web # Disallow: / # User-agent: Cohere-ai # Disallow: / # User-agent: DataForSeoBot # Disallow: / # User-agent: FacebookBot # Disallow: / # User-agent: Google-Extended # Disallow: / # User-agent: GPTBot # Disallow: / # User-agent: ImagesiftBot # Disallow: / # User-agent: Magpie-crawler # Disallow: / # User-agent: Omgili # Disallow: / # User-agent: Omgilibot # Disallow: / # User-agent: Peer39_crawler # Disallow: / # User-agent: Peer39_crawler/1.0 # Disallow: / # User-agent: PerplexityBot # Disallow: / # User-agent: YouBot # Disallow: / User-agent: * Allow: / Sitemap: https://davidroessli.com/sitemap.xml