# ========================================================================== # WordPress robots.txt — general-purpose template # Last reviewed: 2026-05-14 # Customize the [SITEMAP] line at the bottom for each site. # ========================================================================== # -------------------------------------------------------------------------- # AI training crawlers — block all by default # (uncomment any you want to allow, e.g. Google-Extended for Gemini training) # -------------------------------------------------------------------------- User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: OAI-SearchBot Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: Google-Extended Disallow: / User-agent: Amazonbot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: meta-externalagent Disallow: / User-agent: FacebookBot Disallow: / User-agent: PerplexityBot Disallow: / User-agent: YouBot Disallow: / User-agent: cohere-ai Disallow: / User-agent: Diffbot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: Bytespider Disallow: / User-agent: TimpiBot Disallow: / User-agent: ZipchatBot Disallow: / User-agent: Zipchat Disallow: / # -------------------------------------------------------------------------- # Aggressive SEO scrapers — block (low value, high load) # -------------------------------------------------------------------------- User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: BLEXBot Disallow: / User-agent: dotbot Disallow: / User-agent: serpstatbot Disallow: / User-agent: LinkupBot Disallow: / User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / # -------------------------------------------------------------------------- # Non-target-market search engines # (uncomment only the ones you genuinely don't need to be indexed by) # -------------------------------------------------------------------------- User-agent: Baiduspider Disallow: / User-agent: Baiduspider-render Disallow: / User-agent: YandexBot Disallow: / User-agent: Sogou Disallow: / User-agent: PetalBot Disallow: / # -------------------------------------------------------------------------- # Slow down legitimate but heavy crawlers # -------------------------------------------------------------------------- User-agent: bingbot Crawl-delay: 10 # -------------------------------------------------------------------------- # Default rules — apply to ALL crawlers not matched above # -------------------------------------------------------------------------- User-agent: * # WordPress core Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-includes/ Disallow: /wp-content/cache/ Disallow: /wp-content/plugins/ Disallow: /wp-content/uploads/wpforms/ Disallow: /readme.html Disallow: /license.txt Disallow: /xmlrpc.php # Author / search / trackback / pagination noise Disallow: /author/ Disallow: /?author= Disallow: /?s= Disallow: /search/ Disallow: /trackback/ Disallow: */trackback/ Disallow: /*?replytocom= Disallow: */comment-page-* # WordPress feeds (RSS / Atom / RDF / comments) # Remove this block if your site relies on RSS subscribers Disallow: /feed/ Disallow: /feed Disallow: */feed/ Disallow: */feed Disallow: /comments/feed/ Disallow: */comments/feed/ Disallow: /*?feed= Disallow: /*&feed= # WooCommerce — keep these even if site doesn't use Woo (harmless) Disallow: /wp-content/uploads/wc-logs/ Disallow: /wp-content/uploads/woocommerce_transient_files/ Disallow: /wp-content/uploads/woocommerce_uploads/ Disallow: /*?add-to-cart= Disallow: /*?*add-to-cart= Disallow: /cart/ Disallow: /checkout/ Disallow: /my-account/ # Allow CSS/JS so Google can render pages correctly Allow: /wp-content/uploads/ Allow: /*.css$ Allow: /*.js$ Allow: /*.png$ Allow: /*.jpg$ Allow: /*.gif$ Allow: /*.svg$ Allow: /*.webp$ # -------------------------------------------------------------------------- # Sitemap — change this URL per site # -------------------------------------------------------------------------- Sitemap: https://www.weipuconnector.com/sitemap_index.xml