# # robots.txt # # This file is to prevent the crawling and indexing of certain parts # of your site by web crawlers and spiders run by sites like Yahoo! # and Google. By telling these "robots" where not to go on your site, # you save bandwidth and server resources. # # This file will be ignored unless it is at the root of your host: # Used: http://example.com/robots.txt # Ignored: http://example.com/site/robots.txt # # For more information about the robots.txt standard, see: # http://www.robotstxt.org/robotstxt.html User-agent: * Crawl-delay: 1760 # Directories Disallow: /chairmansblog3/wp-login.php* Disallow: /chairmansblog3/*.php* Disallow: /sites/default/files/* Disallow: /includes/ Disallow: /misc/ Disallow: /modules/ Disallow: /profiles/ Disallow: /scripts/ Disallow: /themes/ #Disallow: /mass-listings/ #Disallow: /mass-listings #Disallow: /mass-listings* Disallow: /mass-listings/* Disallow: /mass-listings-notanymore Disallow: /mass-listings-notanymore* Disallow: /mass-listings-notanymore/* # Files #added by gjcopp 21-07-21 remove when document is published Disallow: /sites/default/files/resource_documents/Trad_Cust_canonical_notes.pdf Disallow: /CHANGELOG.txt Disallow: /cron.php Disallow: /INSTALL.mysql.txt Disallow: /INSTALL.pgsql.txt Disallow: /INSTALL.sqlite.txt Disallow: /install.php Disallow: /INSTALL.txt Disallow: /LICENSE.txt Disallow: /MAINTAINERS.txt Disallow: /update.php Disallow: /UPGRADE.txt Disallow: /xmlrpc.php # Paths (clean URLs) #added by gjcopp 21-07-21 remove when document is published Disallow: /tradcust/canonical_notes Disallow: /admin/ Disallow: /comment/reply/ Disallow: /filter/tips/ Disallow: /node/add/ Disallow: /search/ Disallow: /user/register/ Disallow: /user/password/ Disallow: /user/login/ Disallow: /user/logout/ # Paths (no clean URLs) Disallow: /?q=admin/ Disallow: /?q=comment/reply/ Disallow: /?q=filter/tips/ Disallow: /?q=node/add/ Disallow: /?q=search/ Disallow: /?q=user/password/ Disallow: /?q=user/register/ Disallow: /?q=user/login/ Disallow: /?q=user/logout/ Disallow: /?q=mass-listings* Disallow: /mass-listings?* # # spiderslap Disallow: /now/bugoff Disallow: /photo-files/* Disallow: /find-a-mass/* Disallow: /news-and-events/* Disallow: /resources/shop/* Disallow: /autodiscover/* User-agent: Baiduspider Disallow: / User-agent: Curl Disallow: / User-agent: MJ12bot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: oBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: Baiduspider Disallow: User-agent: Curl Disallow: User-agent: MJ12bot Disallow: User-agent: AhrefsBot Disallow: User-agent: oBot Disallow: User-agent: SemrushBot Disallow: User-agent: YandexBot Disallow: User-Agent: The Knowledge AI Disallow: / User-agent: * Crawl-delay: 1760 Disallow: /cgi-bin/ Disallow: /private/ Disallow: /tmp/ Disallow: /feed Disallow: /rss Disallow: /feeds/ Disallow: /photo-files/ Disallow: /cache/ Disallow: /taxonomy/term/*/all/* Disallow: /church/*?page=* Allow: /product/daily-missal-1962 Allow: /product/ordinary-prayers-traditional-latin-mass Allow: /catalog/missals Allow: /mp3-chant-downloads Allow: /about User-agent: AI2Bot User-agent: Ai2Bot-Dolma User-agent: aiHitBot User-agent: Amazonbot User-agent: Andibot User-agent: anthropic-ai User-agent: Applebot User-agent: Applebot-Extended User-agent: bedrockbot User-agent: Brightbot 1.0 User-agent: Bytespider User-agent: CCBot User-agent: ChatGPT-User User-agent: Claude-SearchBot User-agent: Claude-User User-agent: Claude-Web User-agent: ClaudeBot User-agent: cohere-ai User-agent: cohere-training-data-crawler User-agent: Cotoyogi User-agent: Crawlspace User-agent: Diffbot User-agent: DuckAssistBot User-agent: EchoboxBot User-agent: FacebookBot User-agent: facebookexternalhit User-agent: Factset_spyderbot User-agent: FirecrawlAgent User-agent: FriendlyCrawler User-agent: Google-CloudVertexBot User-agent: Google-Extended User-agent: GoogleOther User-agent: GoogleOther-Image User-agent: GoogleOther-Video User-agent: GPTBot User-agent: iaskspider/2.0 User-agent: ICC-Crawler User-agent: ImagesiftBot User-agent: img2dataset User-agent: ISSCyberRiskCrawler User-agent: Kangaroo Bot User-agent: meta-externalagent User-agent: Meta-ExternalAgent User-agent: meta-externalfetcher User-agent: Meta-ExternalFetcher User-agent: MistralAI-User/1.0 User-agent: MyCentralAIScraperBot User-agent: NovaAct User-agent: OAI-SearchBot User-agent: omgili User-agent: omgilibot User-agent: Operator User-agent: PanguBot User-agent: Panscient User-agent: panscient.com User-agent: Perplexity-User User-agent: PerplexityBot User-agent: PetalBot User-agent: PhindBot User-agent: Poseidon Research Crawler User-agent: QualifiedBot User-agent: QuillBot User-agent: quillbot.com User-agent: SBIntuitionsBot User-agent: Scrapy User-agent: SemrushBot User-agent: SemrushBot-BA User-agent: SemrushBot-CT User-agent: SemrushBot-OCOB User-agent: SemrushBot-SI User-agent: SemrushBot-SWA User-agent: Sidetrade indexer bot User-agent: TikTokSpider User-agent: Timpibot User-agent: VelenPublicWebCrawler User-agent: Webzio-Extended User-agent: wpbot User-agent: YandexAdditional User-agent: YandexAdditionalBot User-agent: YouBot Disallow: /