# LAURA Digital Agency - Robots.txt # https://laura.lat # Actualizado con sitemap # Allow all crawlers to main content User-agent: * Allow: / Allow: /blog Allow: /blog/ Allow: /blog-posts/ # Disallow sensitive directories and files Disallow: /php/ Disallow: /config/ Disallow: /.git/ Disallow: /node_modules/ Disallow: /api/ Disallow: /admin/ Disallow: /error/ Disallow: /temp/ Disallow: /cache/ Disallow: /backup/ Disallow: /logs/ # Disallow file types Disallow: *.log Disallow: *.sql Disallow: *.env Disallow: *.config Disallow: *.ini Disallow: *.conf Disallow: *.bak Disallow: *.old Disallow: *.tmp # Disallow asset source files (allow compiled assets) Disallow: /assets/php/ Disallow: /assets/scss/ Disallow: /assets/src/ # Allow CSS and JS files Allow: /assets/css/ Allow: /assets/js/ # Specific crawler rules User-agent: Googlebot Allow: / Allow: /blog Allow: /blog-posts/ Crawl-delay: 1 User-agent: Bingbot Allow: / Allow: /blog Allow: /blog-posts/ Crawl-delay: 1 # Block aggressive bots User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: PetalBot Disallow: / User-agent: BLEXBot Disallow: / # Block AI training bots (opcional - uncomment if desired) # User-agent: CCBot # Disallow: / # # User-agent: ChatGPT-User # Disallow: / # # User-agent: GPTBot # Disallow: / # # User-agent: Google-Extended # Disallow: / # Block social media bots from scraping (allow official ones) User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / # Generic social scrapers User-agent: *bot* Disallow: /api/ Disallow: /admin/ # SITEMAP PRINCIPAL Sitemap: https://laura.lat/sitemap.xml # Host directive (helps with canonicalization) Host: https://laura.lat # Crawl delay for all bots (prevents server overload) Crawl-delay: 2