################################### # www.TechnicalTalk.net Smart Robots v3.05 ################################### # This is a smart robots.txt which logs the ip and user agent of every visitor. # Due to the compatibility issues between different bots and whether they support # wildcards (*), multiple user-agents and end-anchors ($), I am providing different # blocks for some. # # Detected Spider/Bot: None # # Headers Sent: # Content-Type: text/plain # Expires: Sun, 31 Aug 2008 05:20:25 GMT (12 hour validity) # # My Sitemap - I don't provide it just for the fun of it Sitemap: http://www.technicaltalk.net/index.php?action=sitemap;xml # Google - Most Important bot # Unfortunately a robots.txt will only stop it crawling certain urls, and NOT adding any # urls which it comes across into its index. So we're relying on a meta noindex tag. User-agent: Googlebot # Don't index mobile versions Disallow: /index.php?*;wap Disallow: /index.php?*;wap2 Disallow: /index.php?*;imode # Yahoo - Too aggressive # So limit it as much as possible. User-agent: Slurp # Disallow Everything Disallow: / # Now allow bits and then disallow bits # Allow: /sitemap.xml$ Allow: /robots.txt$ Allow: /index.php$ Allow: /index.php?topic=*.0$ Allow: /index.php?topic=*.*0$ Allow: /index.php?topic=*.*5$ Allow: /index.php?board=*.0$ Allow: /index.php?board=*.*0$ Allow: /index.php?board=*.*5$ # But don't allow these Disallow: /index.php?*.msg Disallow: /index.php?topic=*.msg*0$ Disallow: /index.php?topic=*.msg*5$ Disallow: /index.php?*.new # Anything with a ; disallow Disallow: /index.php?*;* # Arcade Related Allow: /index.php?action=arcade$ Allow: /index.php?action=stats$ Allow: /index.php?action=arcade;sa=play;game= # Bad bot - Often ignores robots.txt - Waste of bandwidth # Despite claiming on their website to be a search engine in development # I'm suspicious as to whether they are a harvester pretending to be SE User-agent: Twiceler Disallow: / User-Agent: W3C-checklink Disallow: / User-agent: NPBot Disallow: / User-Agent: Googlebot-Image Disallow: / User-agent: psbot Disallow: / User-agent: OmniExplorer_Bot Disallow: / User-agent: FreeFind Disallow: / User-agent: BecomeBot Disallow: / User-agent: Nutch Disallow: / User-agent: nicebot Disallow: / User-agent: Jetbot/1.0 Disallow: / User-agent: Jetbot Disallow: / User-agent: WebVac Disallow: / User-agent: Stanford Disallow: / User-agent: scooter Disallow: / User-agent: naver Disallow: / User-agent: dumbot Disallow: / User-agent: Hatena Antenna Disallow: / User-agent: grub-client Disallow: / User-agent: grub Disallow: / User-agent: WebZip Disallow: / User-agent: larbin Disallow: / User-agent: b2w/0.1 Disallow: / User-agent: Copernic Disallow: / User-agent: psbot Disallow: / User-agent: Python-urllib Disallow: / User-agent: NetMechanic Disallow: / User-agent: URL_Spider_Pro Disallow: / User-agent: CherryPicker Disallow: / User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: WebBandit Disallow: / User-agent: EmailWolf Disallow: / User-agent: ExtractorPro Disallow: / User-agent: CopyRightCheck Disallow: / User-agent: Crescent Disallow: / User-agent: SiteSnagger Disallow: / User-agent: ProWebWalker Disallow: / User-agent: CheeseBot Disallow: / User-agent: LNSpiderguy Disallow: / User-agent: Mozilla Disallow: / User-agent: mozilla Disallow: / User-agent: mozilla/3 Disallow: / User-agent: mozilla/4 Disallow: / User-agent: mozilla/5 Disallow: / User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows NT) Disallow: / User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows 95) Disallow: / User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows 98) Disallow: / User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows XP) Disallow: / User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows 2000) Disallow: / User-agent: Alexibot Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: Stanford Comp Sci Disallow: / User-agent: MIIxpc Disallow: / User-agent: Telesoft Disallow: / User-agent: Website Quester Disallow: / User-agent: moget/2.1 Disallow: / User-agent: WebZip/4.0 Disallow: / User-agent: WebStripper Disallow: / User-agent: WebSauger Disallow: / User-agent: WebCopier Disallow: / User-agent: NetAnts Disallow: / User-agent: Mister PiX Disallow: / User-agent: WebAuto Disallow: / User-agent: TheNomad Disallow: / User-agent: WWW-Collector-E Disallow: / User-agent: RMA Disallow: / User-agent: libWeb/clsHTTP Disallow: / User-agent: asterias Disallow: / User-agent: httplib Disallow: / User-agent: turingos Disallow: / User-agent: spanner Disallow: / User-agent: InfoNaviRobot Disallow: / User-agent: Harvest/1.5 Disallow: / User-agent: Bullseye/1.0 Disallow: / User-agent: Mozilla/4.0 (compatible; BullsEye; Windows 95) Disallow: / User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0 Disallow: / User-agent: CherryPickerSE/1.0 Disallow: / User-agent: CherryPickerElite/1.0 Disallow: / User-agent: WebBandit/3.50 Disallow: / User-agent: NICErsPRO Disallow: / User-agent: Microsoft URL Control - 5.01.4511 Disallow: / User-agent: DittoSpyder Disallow: / User-agent: Foobot Disallow: / User-agent: WebmasterWorldForumBot Disallow: / User-agent: SpankBot Disallow: / User-agent: BotALot Disallow: / User-agent: lwp-trivial/1.34 Disallow: / User-agent: lwp-trivial Disallow: / User-agent: http://www.WebmasterWorld.com bot Disallow: / User-agent: BunnySlippers Disallow: / User-agent: Microsoft URL Control - 6.00.8169 Disallow: / User-agent: URLy Warning Disallow: / User-agent: Wget/1.6 Disallow: / User-agent: Wget/1.5.3 Disallow: / User-agent: Wget Disallow: / User-agent: LinkWalker Disallow: / User-agent: cosmos Disallow: / User-agent: moget Disallow: / User-agent: hloader Disallow: / User-agent: humanlinks Disallow: / User-agent: LinkextractorPro Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Mata Hari Disallow: / User-agent: LexiBot Disallow: / User-agent: Web Image Collector Disallow: / User-agent: The Intraformant Disallow: / User-agent: True_Robot/1.0 Disallow: / User-agent: True_Robot Disallow: / User-agent: BlowFish/1.0 Disallow: / User-agent: http://www.SearchEngineWorld.com bot Disallow: / User-agent: http://www.WebmasterWorld.com bot Disallow: / User-agent: JennyBot Disallow: / User-agent: MIIxpc/4.2 Disallow: / User-agent: BuiltBotTough Disallow: / User-agent: ProPowerBot/2.14 Disallow: / User-agent: BackDoorBot/1.0 Disallow: / User-agent: toCrawl/UrlDispatcher Disallow: / User-agent: WebEnhancer Disallow: / User-agent: suzuran Disallow: / User-agent: VCI WebViewer VCI WebViewer Win32 Disallow: / User-agent: VCI Disallow: / User-agent: Szukacz/1.4 Disallow: / User-agent: QueryN Metasearch Disallow: / User-agent: Openfind data gathere Disallow: / User-agent: Openfind Disallow: / User-agent: Xenu's Link Sleuth 1.1c Disallow: / User-agent: Xenu's Disallow: / User-agent: Zeus Disallow: / User-agent: RepoMonkey Bait & Tackle/v1.01 Disallow: / User-agent: RepoMonkey Disallow: / User-agent: Microsoft URL Control Disallow: / User-agent: Openbot Disallow: / User-agent: URL Control Disallow: / User-agent: Zeus Link Scout Disallow: / User-agent: Zeus 32297 Webster Pro V2.9 Win32 Disallow: / User-agent: Webster Pro Disallow: / User-agent: EroCrawler Disallow: / User-agent: LinkScan/8.1a Unix Disallow: / User-agent: Keyword Density/0.9 Disallow: / User-agent: Kenjin Spider Disallow: / User-agent: Iron33/1.0.2 Disallow: / User-agent: Bookmark search tool Disallow: / User-agent: GetRight/4.2 Disallow: / User-agent: FairAd Client Disallow: / User-agent: Gaisbot Disallow: / User-agent: Aqua_Products Disallow: / User-agent: Radiation Retriever 1.1 Disallow: / User-agent: WebmasterWorld Extractor Disallow: / User-agent: Flaming AttackBot Disallow: / User-agent: Oracle Ultra Search Disallow: / User-agent: MSIECrawler Disallow: / User-agent: PerMan Disallow: / User-agent: searchpreview Disallow: / User-agent: sootle Disallow: / User-agent: es Disallow: / User-agent: Enterprise_Search/1.0 Disallow: / User-agent: Enterprise_Search Disallow: / # Stop following PHPSESSID's User-Agent: MJ12bot Disallow: /index.php?PHPSESSID # Catch all (remainder) # Will be followed by any bots other than ones identified above # Uses BASIC robots.txt directives without wildcards, end-anchors etc # So Spiders should understand these (including MSNBOT) User-agent: * # Default SMF Folders Disallow: /attachments/ Disallow: /Packages/ Disallow: /Smileys/ Disallow: /Sources/ Disallow: /Themes/ Disallow: /avatars/ Disallow: /uploads/ # Default SMF Actions Disallow: /index.php?action=activate Disallow: /index.php?action=news Disallow: /index.php?action=packages Disallow: /index.php?action=packageget Disallow: /index.php?action=featuresettings Disallow: /index.php?action=serversettings Disallow: /index.php?action=manageboards Disallow: /index.php?action=postsettings Disallow: /index.php?action=managecalendar Disallow: /index.php?action=managesearch Disallow: /index.php?action=smileys Disallow: /index.php?action=manageattachments Disallow: /index.php?action=viewmembers Disallow: /index.php?action=membergroups Disallow: /index.php?action=permissions Disallow: /index.php?action=regcenter Disallow: /index.php?action=ban Disallow: /index.php?action=maintain Disallow: /index.php?action=reports Disallow: /index.php?action=viewErrorLog Disallow: /index.php?action=admin Disallow: /index.php?action=calendar Disallow: /index.php?action=emailuser Disallow: /index.php?action=findmember Disallow: /index.php?action=help Disallow: /index.php?action=helpadmin Disallow: /index.php?action=login Disallow: /index.php?action=logout Disallow: /index.php?action=mlist Disallow: /index.php?action=modifykarma Disallow: /index.php?action=pm Disallow: /index.php?action=post Disallow: /index.php?action=printpage Disallow: /index.php?action=profile Disallow: /index.php?action=recent Disallow: /index.php?action=register Disallow: /index.php?action=reminder Disallow: /index.php?action=search Disallow: /index.php?action=theme Disallow: /index.php?action=unread Disallow: /index.php?action=unreadreplies Disallow: /index.php?action=verificationcode Disallow: /index.php?action=who Disallow: /index.php?theme # SMF Mod Related Disallow: /archive.php Disallow: /index.php?action=blog Disallow: /index.php?action=viewblog Disallow: /index.php?action=chess Disallow: /index.php?action=comment Disallow: /index.php?action=downloads Disallow: /index.php?action=links Disallow: /index.php?action=reporttm Disallow: /index.php?action=recenttopics Disallow: /index.php?action=mm # Disallow: /index.php?action=sitemap Disallow: /index.php?action=staff Disallow: /index.php?action=tags Disallow: /index.php?action=thankyou Disallow: /index.php?action=viewkarma Disallow: /index.php?action=viewers Disallow: /index.php?f= Disallow: /index.php?filter Disallow: /index.php?referredby Disallow: /Games/ Disallow: /Downloads/ Disallow: /index.php?action=arcade;favorites Disallow: /index.php?action=arcade;sa=highscore Disallow: /index.php?action=arcade;sa=play;random Disallow: /index.php?action=arcade;category Disallow: /index.php?action=arcade;sort Disallow: /index.php?action=arcade;stats Disallow: /index.php?action=stats;expand Disallow: /index.php?action=stats;collapse Disallow: /index.php?action=tagged Disallow: /index.php?action=admod