# robots.txt — Studio Legale Militare # Generato automaticamente — 2026-05-18 (v4.30 — Politica C: Selettiva) # Spec: https://www.rfc-editor.org/rfc/rfc9309 (Robots Exclusion Protocol) # # Politica adottata: opt-out training IA / opt-in search-time citation # Riferimenti normativi: Direttiva UE 2019/790 art. 4 §3 (TDM opt-out) # + D.Lgs. 177/2021 art. 70-quater l. 633/1941 # + Reg. UE 2024/1689 (AI Act) # Informativa completa: https://www.studiolegalemilitare.it/ai-policy # ─── Crawler generici ─────────────────────────────────────────────── User-agent: * Allow: / Disallow: /data/ Disallow: /includes/ Disallow: /vendor/ Disallow: /cerca? Crawl-delay: 1 # ═══════════════════════════════════════════════════════════════════ # SEARCH-TIME / INFERENCE-TIME crawler — ALLOW # (leggono il sito quando l'utente fa una query realtime, # NON memorizzano nei pesi del modello — visibilità AI utile) # ═══════════════════════════════════════════════════════════════════ # ─── Search engine classici ───────────────────────────────────────── User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Bingbot Allow: / User-agent: DuckDuckBot Allow: / User-agent: Yandex Allow: / User-agent: Baiduspider Allow: / User-agent: Applebot Allow: / # ─── AI search-time / on-demand fetchers ──────────────────────────── # Permessi: leggono il sito SOLO quando un utente fa una domanda # realtime in ChatGPT Search, Perplexity, Claude Web, Gemini, etc. User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: Claude-Web Allow: / User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / User-agent: Google-CloudVertexBot Allow: / User-agent: DuckAssistBot Allow: / User-agent: KagiBot Allow: / User-agent: MistralAI-User Allow: / User-agent: YouBot Allow: / User-agent: Brave-Leo Allow: / User-agent: Phind Allow: / User-agent: Meta-ExternalFetcher Allow: / User-agent: cohere-search-data-crawler Allow: / # ═══════════════════════════════════════════════════════════════════ # TRAINING crawler — DISALLOW # (assorbono i contenuti nei dataset di addestramento LLM # permanenti; opt-out ex art. 4 §3 Dir. UE 2019/790) # ═══════════════════════════════════════════════════════════════════ User-agent: GPTBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: FacebookBot Disallow: / User-agent: Amazonbot Disallow: / User-agent: CCBot Disallow: / User-agent: cohere-training-data-crawler Disallow: / User-agent: AI2Bot Disallow: / User-agent: Diffbot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: PetalBot Disallow: / User-agent: Bytespider Disallow: / # ─── Sitemaps ──────────────────────────────────────────────────────── Sitemap: https://www.studiolegalemilitare.it/sitemap-index.xml Sitemap: https://www.studiolegalemilitare.it/sitemap.xml Sitemap: https://www.studiolegalemilitare.it/sitemap-news.xml Sitemap: https://www.studiolegalemilitare.it/sitemap-images.xml Sitemap: https://www.studiolegalemilitare.it/sitemap-faq.xml # ─── Risorse AI/GEO ───────────────────────────────────────────────── # llms.txt: https://www.studiolegalemilitare.it/llms.txt # llms-full.txt: https://www.studiolegalemilitare.it/llms-full.txt # ai.txt: https://www.studiolegalemilitare.it/ai.txt # citations.json: https://www.studiolegalemilitare.it/citations.json # agents.json: https://www.studiolegalemilitare.it/agents.json # carbon.txt: https://www.studiolegalemilitare.it/carbon.txt # security.txt: https://www.studiolegalemilitare.it/.well-known/security.txt # ai-preferences.txt: https://www.studiolegalemilitare.it/.well-known/ai-preferences.txt # mcp.json: https://www.studiolegalemilitare.it/.well-known/mcp.json # traffic-advice: https://www.studiolegalemilitare.it/.well-known/traffic-advice # nodeinfo: https://www.studiolegalemilitare.it/.well-known/nodeinfo