robot-id: abcdatos
robot-name: ABCdatos BotLink
robot-cover-url: http://www.abcdatos.com/
robot-details-url: http://www.abcdatos.com/botlink/
robot-owner-name: ABCdatos
robot-owner-url: http://www.abcdatos.com/
robot-owner-email: botlink+AEA-abcdatos.com
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: windows
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent: BotLink
robot-noindex: no
robot-host: 217.126.39.167
robot-from: no
robot-useragent: ABCdatos BotLink/1.0.2 (test links)
robot-language: basic
robot-description: This robot is used to verify availability of the ABCdatos
                   directory entries (http://www.abcdatos.com), checking
                   HTTP HEAD. Robot runs twice a week. Under HTTP 5xx
                   error responses or unable to connect, it repeats
                   verification some hours later, verifiying if that was a
                   temporary situation.
robot-history: This robot was developed by ABCdatos team to help
               working in the directory maintenance.
robot-environment: commercial
modified-date: Thu, 29 May 2003 01:00:00 GMT
modified-by: ABCdatos

robot-id:                       acme-spider
robot-name:                     Acme.Spider
robot-cover-url:                http://www.acme.com/java/software/Acme.Spider.html
robot-details-url:              http://www.acme.com/java/software/Acme.Spider.html
robot-owner-name:               Jef Poskanzer - ACME Laboratories
robot-owner-url:                http://www.acme.com/
robot-owner-email:              jef@acme.com
robot-status:                   active
robot-purpose:                  indexing maintenance statistics
robot-type:                     standalone
robot-platform:                 java
robot-availability:             source
robot-exclusion:                yes
robot-exclusion-useragent:      Due to a deficiency in Java it's not currently possible to set the User-Agent.
robot-noindex:                  no
robot-host:                     *
robot-from:                     no
robot-useragent:                Due to a deficiency in Java it's not currently possible to set the User-Agent.
robot-language:                 java
robot-description:              A Java utility class for writing your own robots.
robot-history:                  
robot-environment:              
modified-date:                  Wed, 04 Dec 1996 21:30:11 GMT
modified-by:                    Jef Poskanzer

robot-id:           ahoythehomepagefinder
robot-name:         Ahoy! The Homepage Finder
robot-cover-url:    http://www.cs.washington.edu/research/ahoy/
robot-details-url:  http://www.cs.washington.edu/research/ahoy/doc/home.html
robot-owner-name:   Marc Langheinrich
robot-owner-url:    http://www.cs.washington.edu/homes/marclang
robot-owner-email:  marclang@cs.washington.edu
robot-status:       active
robot-purpose:      maintenance
robot-type:         standalone
robot-platform:     UNIX
robot-availability: none
robot-exclusion:    yes
robot-exclusion-useragent: ahoy
robot-noindex:      no
robot-host:         cs.washington.edu
robot-from:         no
robot-useragent:    'Ahoy! The Homepage Finder'
robot-language:     Perl 5 
robot-description:  Ahoy! is an ongoing research project at the
                    University of Washington for finding personal Homepages.
robot-history:      Research project at the University of Washington in 
                    1995/1996 
robot-environment:  research
modified-date:      Fri June 28 14:00:00 1996
modified-by:        Marc Langheinrich

robot-id: Alkaline
robot-name: Alkaline
robot-cover-url: http://www.vestris.com/alkaline
robot-details-url: http://www.vestris.com/alkaline
robot-owner-name: Daniel Doubrovkine
robot-owner-url: http://cuiwww.unige.ch/~doubrov5 
robot-owner-email: dblock@vestris.com
robot-status: development active
robot-purpose: indexing
robot-type: standalone     
robot-platform: unix windows95 windowsNT
robot-availability: binary      
robot-exclusion: yes
robot-exclusion-useragent: AlkalineBOT 
robot-noindex: yes
robot-host: *
robot-from: no
robot-useragent: AlkalineBOT
robot-language: c++
robot-description: Unix/NT internet/intranet search engine
robot-history: Vestris Inc. search engine designed at the University of
 Geneva 
robot-environment: commercial research 
modified-date: Thu Dec 10 14:01:13 MET 1998
modified-by: Daniel Doubrovkine <dblock@vestris.com>

robot-id:anthill
robot-name:Anthill
robot-cover-url:http://www.anthill.org/index.html
robot-details-url:http://www.anthill.org/index.html
robot-owner-name:Torsten Kaubisch
robot-owner-url:http://www.anthill.org/index.html
robot-owner-email:info@anthill.org
robot-status:development
robot-purpose:indexing
robot-type:standalone
robot-platform:independent
robot-availability:not yet
robot-exclusion:no (soon in V1.2)
robot-exclusion-useragent:anthill
robot-noindex:no
robot-host:anywhere
robot-from:no
robot-useragent:AnthillV1.1
robot-language:java
robot-description:Anthill is used to gather priceinformation automatically from online stores.support for international versions.
robot-history:This is a reasearch project at the University of Mannheim in Germany, professorship Prof. Martin Schader, assistant Dr. Stefan Kuhlins
robot-environment:research
modified-date:Thu, 6 Dec 2001 01:55:00 GMT
modified-by:Torsten Kaubisch

robot-id: appie
robot-name: Walhello appie
robot-cover-url: www.walhello.com
robot-details-url: www.walhello.com/aboutgl.html
robot-owner-name: Aimo Pieterse
robot-owner-url: www.walhello.com
robot-owner-email: aimo@walhello.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: windows98
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: appie
robot-noindex: yes
robot-host: 213.10.10.116, 213.10.10.117, 213.10.10.118
robot-from: yes
robot-useragent: appie/1.1
robot-language: Visual C++
robot-description: The appie-spider is used to collect and index web pages for
 the Walhello search engine
robot-history: The spider was built in march/april 2000
robot-environment: commercial
modified-date: Thu, 20 Jul 2000 22:38:00 GMT
modified-by: Aimo Pieterse

robot-id:           arachnophilia
robot-name:         Arachnophilia
robot-cover-url:    
robot-details-url:
robot-owner-name:   Vince Taluskie
robot-owner-url:    http://www.ph.utexas.edu/people/vince.html
robot-owner-email:  taluskie@utpapa.ph.utexas.edu
robot-status:       
robot-purpose:      
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         halsoft.com
robot-from:         
robot-useragent:    Arachnophilia
robot-language:     
robot-description:  The purpose (undertaken by HaL Software) of this run was to
	collect approximately 10k html documents for testing
	automatic abstract generation
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: arale
robot-name: Arale
robot-cover-url: http://web.tiscali.it/_flat
robot-details-url: http://web.tiscali.it/_flat
robot-owner-name: Flavio Tordini
robot-owner-url: http://web.tiscali.it/_flat
robot-owner-email: flaviotordini@tiscali.it
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: unix, windows, windows95, windowsNT, os2, mac, linux
robot-availability: source, binary
robot-exclusion: no
robot-exclusion-useragent: arale
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: no
robot-language: java
robot-description: A java multithreaded web spider. Download entire web sites or specific resources from the web. Render dynamic sites to static pages.
robot-history: This is brand new.
robot-environment: hobby
modified-date: Thu, 09 Jan 2001 17:28:52 GMT
modified-by: Flavio Tordini

robot-id:           araneo
robot-name:         Araneo
robot-cover-url:    http://esperantisto.net
robot-details-url:  http://esperantisto.net/araneo/
robot-owner-name:   Arto Sarle
robot-owner-url:    http://esperantisto.net
robot-owner-email:  araneo@esperantisto.net
robot-status:       development
robot-purpose:      indexing, statistics
robot-type:         standalone
robot-platform:     Linux
robot-availability: none
robot-exclusion:    yes
robot-exclusion-useragent: araneo
robot-noindex:      yes
robot-nofollow:     yes
robot-host:         *.esperantisto.net
robot-from:         yes
robot-useragent:    Araneo/0.7 (araneo@esperantisto.net; http://esperantisto.net)
robot-language:     Python, Java
robot-description:  Araneo is a web robot developed for crawling and indexing web pages written in the international language Esperanto.  The database will be used to build a web search engine and auxiliary services to be published at esperantisto.net.
robot-history:      (The name Araneo means "spider" in Esperanto.)
robot-environment:  hobby, research
modified-date:      Fri, 16 Nov 2001 08:30:00 GMT
modified-by:        Arto Sarle

robot-id: araybot
robot-name: AraybOt
robot-cover-url: http://www.araykoo.com/
robot-details-url: http://www.araykoo.com/araybot.html
robot-owner-name: Guti
robot-owner-url: http://www.araykoo.com/
robot-owner-email: robot@araykoo.com
robot-status: active
robot-purpose: indexing maintenance
robot-type: standalone
robot-platform: Linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: AraybOt
robot-noindex: yes
robot-host: *
robot-from: no
robot-useragent: AraybOt/1.0 (+http://www.araykoo.com/araybot.html)
robot-language: perl5
robot-description: AraybOt is the agent software of AraykOO! which crawls
 web sites listed in http://dmoz.org/Adult/, in order to build a adult search
 engine.
robot-history: 
robot-environment: service
modified-date: Sat, 19 Jun 2004 20:25:00 GMT+1
modified-by: Guti

robot-id:           architext
robot-name:         ArchitextSpider
robot-cover-url:    http://www.excite.com/
robot-details-url:
robot-owner-name:   Architext Software
robot-owner-url:    http://www.atext.com/spider.html
robot-owner-email:  spider@atext.com
robot-status:       
robot-purpose:      indexing, statistics
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *.atext.com
robot-from:         yes
robot-useragent:    ArchitextSpider
robot-language:     perl 5 and c
robot-description:  Its purpose is to generate a Resource Discovery database,
	and to generate statistics. The ArchitextSpider collects
	information for the Excite and WebCrawler search engines.
robot-history:      
robot-environment:
modified-date:      Tue Oct  3 01:10:26 1995
modified-by:

robot-id:           aretha
robot-name:         Aretha
robot-cover-url:    
robot-details-url:
robot-owner-name:   Dave Weiner
robot-owner-url:    http://www.hotwired.com/Staff/userland/ 
robot-owner-email:  davew@well.com
robot-status:       
robot-purpose:      
robot-type:         
robot-platform:     Macintosh
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      
robot-host:         
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  A crude robot built on top of Netscape and Userland
	Frontier, a scripting system for Macs
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: ariadne
robot-name: ARIADNE
robot-cover-url: (forthcoming)
robot-details-url: (forthcoming)
robot-owner-name: Mr. Matthias H. Gross
robot-owner-url: http://www.lrz-muenchen.de/~gross/
robot-owner-email: Gross@dbs.informatik.uni-muenchen.de
robot-status: development
robot-purpose: statistics, development of focused crawling strategies
robot-type: standalone
robot-platform: java
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: ariadne
robot-noindex: no
robot-host: dbs.informatik.uni-muenchen.de
robot-from: no
robot-useragent: Due to a deficiency in Java it's not currently possible
 to set the User-Agent.
robot-language: java
robot-description: The ARIADNE robot is a prototype of a environment for
 testing focused crawling strategies.
robot-history: This robot is part of a research project at the
 University of Munich (LMU), started in 2000.
robot-environment: research
modified-date: Mo, 13 Mar 2000 14:00:00 GMT
modified-by: Mr. Matthias H. Gross

robot-id:arks
robot-name:arks
robot-cover-url:http://www.dpsindia.com
robot-details-url:http://www.dpsindia.com
robot-owner-name:Aniruddha Choudhury
robot-owner-url:
robot-owner-email:aniruddha.c@usa.net
robot-status:development
robot-purpose:indexing
robot-type:standalone
robot-platform:PLATFORM INDEPENDENT
robot-availability:data
robot-exclusion:yes
robot-exclusion-useragent:arks
robot-noindex:no
robot-host:dpsindia.com
robot-from:no
robot-useragent:arks/1.0
robot-language:Java 1.2
robot-description:The Arks robot is used to build the database
           for the dpsindia/lawvistas.com search service .
           The robot runs weekly, and visits sites in a random order
robot-history:finds its root from s/w development project for a portal
robot-environment:commercial
modified-date:6 th November 2000
modified-by:Aniruddha Choudhury

robot-id:           aspider
robot-name:         ASpider (Associative Spider)
robot-cover-url:    
robot-details-url:
robot-owner-name:   Fred Johansen
robot-owner-url:    http://www.pvv.ntnu.no/~fredj/
robot-owner-email:  fredj@pvv.ntnu.no
robot-status:       retired
robot-purpose:      indexing
robot-type:         
robot-platform:     unix
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         nova.pvv.unit.no
robot-from:         yes
robot-useragent:    ASpider/0.09
robot-language:     perl4
robot-description:  ASpider is a CGI script that searches the web for keywords given by the user through a form.
robot-history:      
robot-environment:  hobby
modified-date:      
modified-by:

robot-id: atn
robot-name: ATN Worldwide
robot-details-url:
robot-cover-url:
robot-owner-name: All That Net
robot-owner-url: http://www.allthatnet.com
robot-owner-email: info@allthatnet.com
robot-status: active
robot-purpose: indexing
robot-type:
robot-platform:
robot-availability:
robot-exclusion: yes
robot-exclusion-useragent: ATN_Worldwide
robot-noindex:
robot-nofollow:
robot-host: www.allthatnet.com
robot-from:
robot-useragent: ATN_Worldwide
robot-language:
robot-description: The ATN robot is used to build the database for the
 AllThatNet search service operated by All That Net.  The robot runs weekly,
 and visits sites in a random order.
robot-history:
robot-environment:
modified-date: July 09, 2000 17:43 GMT

robot-id: atomz
robot-name: Atomz.com Search Robot
robot-cover-url: http://www.atomz.com/help/
robot-details-url: http://www.atomz.com/
robot-owner-name: Mike Thompson
robot-owner-url: http://www.atomz.com/
robot-owner-email: mike@atomz.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: service
robot-exclusion: yes
robot-exclusion-useragent: Atomz
robot-noindex: yes
robot-host: www.atomz.com
robot-from: no
robot-useragent: Atomz/1.0
robot-language: c
robot-description: Robot used for web site search service.
robot-history: Developed for Atomz.com, launched in 1999.
robot-environment: service
modified-date: Tue Jul 13 03:50:06 GMT 1999
modified-by: Mike Thompson

robot-id: auresys
robot-name: AURESYS
robot-cover-url: http://crrm.univ-mrs.fr
robot-details-url: http://crrm.univ-mrs.fr      
robot-owner-name: Mannina Bruno 
robot-owner-url: ftp://crrm.univ-mrs.fr/pub/CVetud/Etudiants/Mannina/CVbruno.htm        
robot-owner-email: mannina@crrm.univ-mrs.fr     
robot-status: robot actively in use
robot-purpose: indexing,statistics
robot-type: Standalone
robot-platform: Aix, Unix
robot-availability: Protected by Password
robot-exclusion: Yes
robot-exclusion-useragent:  
robot-noindex: no
robot-host: crrm.univ-mrs.fr, 192.134.99.192
robot-from: Yes
robot-useragent: AURESYS/1.0
robot-language: Perl 5.001m
robot-description: The AURESYS is used to build a personnal database for 
 somebody who search information. The database is structured to be 
 analysed. AURESYS can found new server by IP incremental. It generate 
 statistics... 
robot-history: This robot finds its roots in a research project at the 
 University of Marseille in 1995-1996
robot-environment: used for Research
modified-date: Mon, 1 Jul 1996 14:30:00 GMT 
modified-by: Mannina Bruno

robot-id:           backrub
robot-name:         BackRub
robot-cover-url:
robot-details-url:
robot-owner-name:   Larry Page
robot-owner-url:    http://backrub.stanford.edu/
robot-owner-email:  page@leland.stanford.edu
robot-status:
robot-purpose:      indexing, statistics
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         *.stanford.edu
robot-from:         yes
robot-useragent:    BackRub/*.*
robot-language:     Java.
robot-description:
robot-history:
robot-environment:
modified-date:      Wed Feb 21 02:57:42 1996.
modified-by:

robot-id: robot-name: bayspider
robot-cover-url: http://www.baytsp.com/
robot-details-url: http://www.baytsp.com/
robot-owner-name: BayTSP.com,Inc
robot-owner-url:
robot-owner-email: marki@baytsp.com
robot-status: Active
robot-purpose: Copyright Infringement Tracking
robot-type: Stand Alone
robot-platform: NT
robot-availability: 24/7
robot-exclusion:
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:
robot-useragent: BaySpider
robot-language: English
robot-description:
robot-history:
robot-environment:
modified-date: 1/15/2001
modified-by: Marki@baytsp.com

robot-id:                       bbot
robot-name:                     BBot
robot-cover-url:                http://www.otthon.net/search
robot-details-url:              http://www.otthon.net/search/bbot
robot-owner-name:               Istvan Fulop
robot-owner-url:                http://www.otthon.net
robot-owner-email:              poluf1 at yahoo dot co dot uk
robot-status:                   development
robot-purpose:                  indexing, maintenance
robot-type:                     standalone
robot-platform:                 windows
robot-availability:             none
robot-exclusion:                yes
robot-exclusion-useragent:      bbot
robot-noindex:                  yes
robot-nofollow:			yes
robot-host:                     *.netcologne.de
robot-from:                     yes
robot-useragent:                bbot/0.100
robot-language:                 perl
robot-description:              Mainly intended for site level search, sometimes set loose.
robot-history:                  Started project in 11/2000. Called BBot since 24/04/2003.
robot-environment:              hobby
modified-date:                  Sun, 04 May 2003 10:15:00 GMT
modified-by:                    Istvan Fulop

robot-id: bigbrother
robot-name: Big Brother
robot-cover-url: http://pauillac.inria.fr/~fpottier/mac-soft.html.en
robot-details-url:
robot-owner-name: Francois Pottier
robot-owner-url: http://pauillac.inria.fr/~fpottier/
robot-owner-email: Francois.Pottier@inria.fr
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: mac
robot-availability: binary
robot-exclusion: no
robot-exclusion-useragent:
robot-noindex: no
robot-host: *
robot-from: not as of 1.0
robot-useragent: Big Brother
robot-language: c++
robot-description: Macintosh-hosted link validation tool.
robot-history:
robot-environment: shareware
modified-date: Thu Sep 19 18:01:46 MET DST 1996
modified-by: Francois Pottier

robot-id: bjaaland
robot-name: Bjaaland
robot-cover-url: http://www.textuality.com
robot-details-url: http://www.textuality.com
robot-owner-name: Tim Bray
robot-owner-url: http://www.textuality.com
robot-owner-email: tbray@textuality.com
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Bjaaland
robot-noindex: no
robot-host: barry.bitmovers.net
robot-from: no
robot-useragent: Bjaaland/0.5
robot-language: perl5
robot-description: Crawls sites listed in the ODP (see http://dmoz.org)
robot-history: None, yet
robot-environment: service
modified-date: Monday, 19 July 1999, 13:46:00 PDT
modified-by: tbray@textuality.com

robot-id:           blackwidow
robot-name:         BlackWidow
robot-cover-url:    http://140.190.65.12/~khooghee/index.html
robot-details-url:
robot-owner-name:   Kevin Hoogheem
robot-owner-url:
robot-owner-email:  khooghee@marys.smumn.edu
robot-status:
robot-purpose:      indexing, statistics
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:         140.190.65.*
robot-from:         yes
robot-useragent:    BlackWidow
robot-language:     C, C++.
robot-description:  Started as a research project and now is used to find links
	for a random link generator.  Also is used to research the
	growth of specific sites.
robot-history:
robot-environment:
modified-date:      Fri Feb  9 00:11:22 1996.
modified-by:

robot-id: blindekuh
robot-name: Die Blinde Kuh
robot-cover-url: http://www.blinde-kuh.de/
robot-details-url: http://www.blinde-kuh.de/robot.html (german language)
robot-owner-name: Stefan R. Mueller
robot-owner-url: http://www.rrz.uni-hamburg.de/philsem/stefan_mueller/
robot-owner-email:maschinist@blinde-kuh.de
robot-status: development
robot-purpose: indexing
robot-type: browser
robot-platform: unix
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent:
robot-noindex: no
robot-host: minerva.sozialwiss.uni-hamburg.de
robot-from: yes
robot-useragent: Die Blinde Kuh
robot-language: perl5
robot-description: The robot is use for indixing and proofing the
 registered urls in the german language search-engine for kids.
 Its a none-comercial one-woman-project of Birgit Bachmann
 living in Hamburg, Germany.
robot-history: The robot was developed by Stefan R. Mueller
 to help by the manual proof of registered Links.
robot-environment: hobby
modified-date: Mon Jul 22 1998
modified-by: Stefan R. Mueller

robot-id:Bloodhound
robot-name:Bloodhound
robot-cover-url:http://web.ukonline.co.uk/genius/bloodhound.htm
robot-details-url:http://web.ukonline.co.uk/genius/bloodhound.htm
robot-owner-name:Dean Smart
robot-owner-url:http://web.ukonline.co.uk/genius/bloodhound.htm
robot-owner-email:genius@ukonline.co.uk
robot-status:active
robot-purpose:Web Site Download
robot-type:standalone
robot-platform:Windows95, WindowsNT, Windows98, Windows2000
robot-availability:Executible
robot-exclusion:No
robot-exclusion-useragent:Ukonline
robot-noindex:No
robot-host:*
robot-from:No
robot-useragent:None
robot-language:Perl5
robot-description:Bloodhound will download an whole web site depending on the
 number of links to follow specified by the user.
robot-history:First version was released on the 1 july 2000
robot-environment:Commercial
modified-date:1 july 2000
modified-by:Dean Smart

robot-id: borg-bot
robot-name: Borg-Bot
robot-cover-url: 
robot-details-url: http://www.skunkfarm.com/borgbot.htm
robot-owner-name: James Bragg
robot-owner-url: http://www.skunkfarm.com
robot-owner-email: botdev@skunkfarm.com
robot-status: development
robot-purpose: indexing statistics
robot-type: standalone
robot-platform: Linux Windows2000
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: borg-bot/0.9
robot-noindex: yes
robot-host: 24.11.13.173
robot-from: yes
robot-useragent: borg-bot/0.9
robot-language: python
robot-description: Developmental crawler to feed a search engine
robot-history:  
robot-environment: research service 
modified-date: Sat, 20 Oct 2001 04:00:00 GMT
modified-by: Sat, 20 Oct 2001 04:00:00 GMT

robot-id: boxseabot
robot-name: BoxSeaBot
robot-cover-url: http://www.boxsea.com/crawler
robot-details-url: http://www.boxsea.com/crawler
robot-owner-name: BoxSea Search Engine
robot-owner-url: http://www.boxsea.com
robot-owner-email: boxseasearch@yahoo.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: linux
robot-availability:
robot-exclusion: yes
robot-exclusion-useragent: boxseabot
robot-noindex: 
robot-host: 
robot-from:
robot-useragent: BoxSeaBot/0.5 (http://boxsea.com/crawler)
robot-language: java
robot-description: This robot is used to find pages
 for building the BoxSea search engine indices.
robot-history: The robot code uses Nutch.  Earlier
 experimental crawls were done under various user agent
 names such as NutchCVS(boxsea)
robot-environment:
modified-date: Fri, 23 Jul 2004 11:58:00 PST
modified-by: BoxSeaBot

robot-id: brightnet
robot-name: bright.net caching robot
robot-cover-url:
robot-details-url:
robot-owner-name:
robot-owner-url:
robot-owner-email:
robot-status: active 
robot-purpose: caching 
robot-type:
robot-platform: 
robot-availability: none
robot-exclusion: no
robot-noindex:
robot-host: 209.143.1.46
robot-from: no
robot-useragent: Mozilla/3.01 (compatible;)
robot-language:
robot-description:
robot-history:
robot-environment:
modified-date: Fri Nov 13 14:08:01 EST 1998
modified-by: brian d foy <comdog@computerdog.com>

robot-id: bspider
robot-name: BSpider
robot-cover-url: not yet
robot-details-url: not yet
robot-owner-name: Yo Okumura
robot-owner-url: not yet
robot-owner-email: okumura@rsl.crl.fujixerox.co.jp
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: bspider
robot-noindex: yes
robot-host: 210.159.73.34, 210.159.73.35
robot-from: yes
robot-useragent: BSpider/1.0 libwww-perl/0.40
robot-language: perl
robot-description: BSpider is crawling inside of Japanese domain for indexing.
robot-history: Starts Apr 1997 in a research project at Fuji Xerox Corp.
 Research Lab.
robot-environment: research
modified-date: Mon, 21 Apr 1997 18:00:00 JST
modified-by: Yo Okumura

robot-id:           cactvschemistryspider
robot-name:         CACTVS Chemistry Spider
robot-cover-url:    http://schiele.organik.uni-erlangen.de/cactvs/spider.html
robot-details-url:
robot-owner-name:   W. D. Ihlenfeldt
robot-owner-url:    http://schiele.organik.uni-erlangen.de/cactvs/
robot-owner-email:  wdi@eros.ccc.uni-erlangen.de
robot-status:
robot-purpose:      indexing.
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         utamaro.organik.uni-erlangen.de
robot-from:         no
robot-useragent:    CACTVS Chemistry Spider
robot-language:     TCL, C
robot-description:  Locates chemical structures in Chemical MIME formats on WWW
	and FTP servers and downloads them into database searchable
	with structure queries (substructure, fullstructure,
	formula, properties etc.)
robot-history:
robot-environment:
modified-date:      Sat Mar 30 00:55:40 1996.
modified-by:

robot-id: calif
robot-name: Calif
robot-details-url: http://www.tnps.dp.ua/calif/details.html
robot-cover-url: http://www.tnps.dp.ua/calif/
robot-owner-name: Alexander Kosarev
robot-owner-url: http://www.tnps.dp.ua/~dark/
robot-owner-email: kosarev@tnps.net
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: calif
robot-noindex: yes
robot-host: cobra.tnps.dp.ua
robot-from: yes
robot-useragent: Calif/0.6 (kosarev@tnps.net; http://www.tnps.dp.ua)
robot-language: c++
robot-description: Used to build searchable index
robot-history: In development stage
robot-environment: research
modified-date: Sun, 6 Jun 1999 13:25:33 GMT

robot-id: cassandra
robot-name: Cassandra
robot-cover-url: http://post.mipt.rssi.ru/~billy/search/
robot-details-url: http://post.mipt.rssi.ru/~billy/search/
robot-owner-name: Mr. Oleg Bilibin
robot-owner-url:        http://post.mipt.rssi.ru/~billy/
robot-owner-email: billy168@aha.ru
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: crossplatform
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent:
robot-noindex: no
robot-host: www.aha.ru
robot-from: no
robot-useragent:
robot-language: java
robot-description: Cassandra search robot is used to create and maintain indexed database for widespread Information Retrieval System
robot-history: Master of Science degree project at Moscow Institute of Physics and Technology
robot-environment: research
modified-date: Wed, 3 Jun 1998 12:00:00 GMT

robot-id: cgireader
robot-name: Digimarc Marcspider/CGI
robot-cover-url: http://www.digimarc.com/prod_fam.html
robot-details-url: http://www.digimarc.com/prod_fam.html
robot-owner-name: Digimarc Corporation
robot-owner-url: http://www.digimarc.com
robot-owner-email: wmreader@digimarc.com
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent:
robot-noindex:
robot-host: 206.102.3.*
robot-from:
robot-useragent: Digimarc CGIReader/1.0
robot-language: c++
robot-description: Similar to Digimarc Marcspider, Marcspider/CGI examines
    image files for watermarks but more focused on CGI Urls.
    In order to not waste internet bandwidth with yet another crawler,
    we have contracted with one of the major crawlers/seach engines
    to provide us with a list of specific CGI URLs of interest to us.
    If an URL is to a page of interest (via CGI), then we access the
    page to get the image URLs from it, but we do not crawl to
    any other pages.
robot-history: First operation in December 1997
robot-environment: service
modified-date: Fri, 5 Dec 1997 12:00:00 GMT
modified-by: Dan Ramos

robot-id:           checkbot
robot-name:         Checkbot
robot-cover-url:    http://www.xs4all.nl/~graaff/checkbot/
robot-details-url:
robot-owner-name:   Hans de Graaff
robot-owner-url:    http://www.xs4all.nl/~graaff/checkbot/
robot-owner-email:  graaff@xs4all.nl
robot-status:       active
robot-purpose:      maintenance
robot-type:         standalone
robot-platform:     unix,WindowsNT
robot-availability: source
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         no
robot-useragent:    Checkbot/x.xx LWP/5.x
robot-language:     perl 5
robot-description:  Checkbot checks links in a
	given set of pages on one or more servers. It reports links
	which returned an error code
robot-history:      
robot-environment:  hobby
modified-date:      Tue Jun 25 07:44:00 1996
modified-by:        Hans de Graaff

robot-id: christcrawler
robot-name: ChristCrawler.com
robot-cover-url: http://www.christcrawler.com/search.cfm
robot-details-url: http://www.christcrawler.com/index.cfm
robot-owner-name: Jeremy DeYoung
robot-owner-url: http://www.christcentral.com/aboutus/index.cfm
robot-owner-email: jeremy.deyoung@christcentral.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Windows NT 4.0 SP5
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: christcrawler
robot-noindex: yes
robot-host: 64.51.218.*, 64.51.219.*, 12.107.236.*, 12.107.237.*
robot-from: yes
robot-useragent: Mozilla/4.0 (compatible; ChristCrawler.com, ChristCrawler@ChristCENTRAL.com)
robot-language: Cold Fusion 4.5
robot-description: A Christian internet spider that searches web sites to find Christian Related material
robot-history: Developed because of the growing need for a more God influence on the Internet.
robot-environment: service
modified-date: Fri, 27 Jun 2001 00:53:12 CST
modified-by: Jeremy DeYoung

robot-id:           churl
robot-name:         churl
robot-cover-url:    http://www-personal.engin.umich.edu/~yunke/scripts/churl/
robot-details-url:
robot-owner-name:   Justin Yunke
robot-owner-url:    http://www-personal.engin.umich.edu/~yunke/
robot-owner-email:  yunke@umich.edu
robot-status:       
robot-purpose:      maintenance
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  A URL checking robot, which stays within one step of the
	local server
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: cienciaficcion
robot-name: cIeNcIaFiCcIoN.nEt
robot-cover-url: http://www.cienciaficcion.net/
robot-details-url: http://www.cienciaficcion.net/
robot-owner-name: David Fernndez
robot-owner-url: http://www.cyberdark.net/
robot-owner-email: root@cyberdark.net
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: linux
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent:
robot-noindex: yes
robot-host: epervier.cqhost.net
robot-from: no
robot-useragent: cIeNcIaFiCcIoN.nEt Spider (http://www.cienciaficcion.net)
robot-language: php,perl
robot-description: Robot encargado de la indexacin de las pginas para www.cienciaficcion.net
robot-history: Alcorkn (Madrid) - Europa 2000/2001
robot-environment: hobby
modified-date: Sat, 18 Aug 2001 00:38:52 GMT
modified-by: David Fernndez

robot-id: cmc
robot-name: CMC/0.01
robot-details-url: http://www2.next.ne.jp/cgi-bin/music/help.cgi?phase=robot
robot-cover-url: http://www2.next.ne.jp/music/
robot-owner-name: Shinobu Kubota.
robot-owner-url: http://www2.next.ne.jp/cgi-bin/music/help.cgi?phase=profile
robot-owner-email: shinobu@po.next.ne.jp
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: CMC/0.01
robot-noindex: no
robot-host: haruna.next.ne.jp, 203.183.218.4
robot-from: yes
robot-useragent: CMC/0.01
robot-language: perl5
robot-description: This CMC/0.01 robot collects the information
                   of the page that was registered to the music
                   specialty searching service.
robot-history: This CMC/0.01 robot was made for the computer
               music center on November 4, 1997.
robot-environment: hobby
modified-date: Sat, 23 May 1998 17:22:00 GMT

robot-id:Collective
robot-name:Collective
robot-cover-url:http://web.ukonline.co.uk/genius/collective.htm
robot-details-url:http://web.ukonline.co.uk/genius/collective.htm
robot-owner-name:Dean Smart
robot-owner-url:http://web.ukonline.co.uk/genius/collective.htm
robot-owner-email:genius@ukonline.co.uk
robot-status:development
robot-purpose:Collective is a highly configurable program designed to interrogate
 online search engines and online databases, it will ignore web pages
 that lie about there content, and dead url's, it can be super strict, it searches each web page
 it finds for your search terms to ensure those terms are present, any positive urls are added to
 a html file for your to view at any time even before the program has finished.
 Collective can wonder the web for days if required.
robot-type:standalone
robot-platform:Windows95, WindowsNT, Windows98, Windows2000
robot-availability:Executible
robot-exclusion:No
robot-exclusion-useragent:
robot-noindex:No
robot-host:*
robot-from:No
robot-useragent:LWP
robot-language:Perl5 (With Visual Basic front-end)
robot-description:Collective is the most cleverest Internet search engine,
 With all found url?s guaranteed to have your search terms.
robot-history:Develpment started on August, 03, 2000
robot-environment:Commercial
modified-date:August, 03, 2000
modified-by:Dean Smart

robot-id: combine
robot-name: Combine System
robot-cover-url: http://www.ub2.lu.se/~tsao/combine.ps
robot-details-url: http://www.ub2.lu.se/~tsao/combine.ps
robot-owner-name: Yong Cao
robot-owner-url: http://www.ub2.lu.se/
robot-owner-email: tsao@munin.ub2.lu.se
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: combine
robot-noindex: no
robot-host: *.ub2.lu.se
robot-from: yes
robot-useragent: combine/0.0
robot-language: c, perl5
robot-description: An open, distributed, and efficient harvester.
robot-history: A complete re-design of the NWI robot (w3index) for DESIRE project. 
robot-environment: research
modified-date: Tue, 04 Mar 1997 16:11:40 GMT
modified-by: Yong Cao

robot-id: confuzzledbot
robot-name: ConfuzzledBot
robot-cover-url: http://www.blue.lu/
robot-details-url: http://bot.confuzzled.lu/
robot-owner-name: Britz Thibaut
robot-owner-url: http://www.confuzzled.lu/
robot-owner-email: bot@confuzzled.lu
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: Linux,Freebsd
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: confuzzledbot
robot-noindex: yes
robot-nofollow: yes
robot-host: *.ion.lu
robot-from: no
robot-useragent: Confuzzledbot/X.X (+http://www.confuzzled.lu/bot/)
robot-language: perl5
robot-description: The robot is used to build a searchable database
 for luxembourgish sites. It only indexes .lu domains and luxembourgish
 sites added to the directory.
robot-history: Developed 2000-2002. Only minor changes recently 
robot-environment: hobby
modified-date: Tue, 11 May 2004 17:45:00 CET
modified-by: Britz Thibaut

robot-id: coolbot
robot-name: CoolBot
robot-cover-url: www.suchmaschine21.de
robot-details-url: www.suchmaschine21.de
robot-owner-name: Stefan Fischerlaender
robot-owner-url: www.suchmaschine21.de
robot-owner-email: info@suchmaschine21.de
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: CoolBot
robot-noindex: yes
robot-host: www.suchmaschine21.de
robot-from: no
robot-useragent: CoolBot
robot-language: perl5
robot-description: The CoolBot robot is used to build and maintain the
 directory of the german search engine Suchmaschine21.
robot-history: none so far
robot-environment: service
modified-date: Wed, 21 Jan 2001 12:16:00 GMT
modified-by: Stefan Fischerlaender

robot-id:           core
robot-name:         Web Core / Roots
robot-cover-url:    http://www.di.uminho.pt/wc
robot-details-url:
robot-owner-name:   Jorge Portugal Andrade
robot-owner-url:    http://www.di.uminho.pt/~cbm
robot-owner-email:  wc@di.uminho.pt
robot-status:
robot-purpose:      indexing, maintenance
robot-type:
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         shiva.di.uminho.pt, from www.di.uminho.pt
robot-from:         no
robot-useragent:    root/0.1
robot-language:     perl
robot-description:  Parallel robot developed in Minho Univeristy in Portugal to
	catalog relations among URLs and to support a special
	navigation aid.
robot-history:      First versions since October 1995.
robot-environment:
modified-date:      Wed Jan 10 23:19:08 1996.
modified-by:

robot-id: cosmos
robot-name: XYLEME Robot
robot-cover-url: http://xyleme.com/
robot-details-url:
robot-owner-name: Mihai Preda
robot-owner-url: http://www.mihaipreda.com/
robot-owner-email: preda@xyleme.com
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: cosmos
robot-noindex: no
robot-nofollow: no
robot-host:
robot-from: yes
robot-useragent: cosmos/0.3
robot-language: c++
robot-description: index XML, follow HTML
robot-history:
robot-environment: service
modified-date: Fri, 24 Nov 2000 00:00:00 GMT
modified-by: Mihai Preda

robot-id: cruiser
robot-name: Internet Cruiser Robot
robot-cover-url: http://www.krstarica.com/
robot-details-url: http://www.krstarica.com/eng/url/
robot-owner-name: Internet Cruiser
robot-owner-url: http://www.krstarica.com/
robot-owner-email: robot@krstarica.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Internet Cruiser Robot
robot-noindex: yes
robot-host: *.krstarica.com
robot-from: no
robot-useragent: Internet Cruiser Robot/2.1
robot-language: c++
robot-description: Internet Cruiser Robot is Internet Cruiser's prime index
 agent.
robot-history:
robot-environment: service
modified-date: Fri, 17 Jan 2001 12:00:00 GMT
modified-by: tech@krstarica.com

robot-id: cusco
robot-name: Cusco
robot-cover-url: http://www.cusco.pt/
robot-details-url: http://www.cusco.pt/
robot-owner-name: Filipe Costa Clerigo
robot-owner-url: http://www.viatecla.pt/
robot-owner-email: clerigo@viatecla.pt
robot-status: active
robot-purpose: indexing
robot-type: standlone
robot-platform: any
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: cusco
robot-noindex: yes
robot-host: *.cusco.pt, *.viatecla.pt
robot-from: yes
robot-useragent: Cusco/3.2
robot-language: Java
robot-description: The Cusco robot is part of the CUCE indexing sistem. It
 gathers information from several sources: HTTP, Databases or filesystem. At
 this moment, it's universe is the .pt domain and the information it gathers
 is available at the Portuguese search engine Cusco http://www.cusco.pt/.
robot-history: The Cusco search engine started in the company ViaTecla as a
 project to demonstrate our development capabilities and to fill the need of
 a portuguese-specific search engine. Now, we are developping new
 functionalities that cannot be found in any other on-line search engines.
robot-environment:service, research
modified-date: Mon, 21 Jun 1999 14:00:00 GMT
modified-by: Filipe Costa Clerigo

robot-id: cyberspyder
robot-name: CyberSpyder Link Test
robot-cover-url: http://www.cyberspyder.com/cslnkts1.html
robot-details-url: http://www.cyberspyder.com/cslnkts1.html
robot-owner-name: Tom Aman
robot-owner-url: http://www.cyberspyder.com/
robot-owner-email: amant@cyberspyder.com
robot-status: active
robot-purpose: link validation, some html validation
robot-type: standalone
robot-platform: windows 3.1x, windows95, windowsNT
robot-availability: binary
robot-exclusion: user configurable
robot-exclusion-useragent: cyberspyder
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: CyberSpyder/2.1
robot-language: Microsoft Visual Basic 4.0
robot-description: CyberSpyder Link Test is intended to be used as a site
 management tool to validate that HTTP links on a page are functional and to
 produce various analysis reports to assist in managing a site.
robot-history: The original robot was created to fill a widely seen need
 for a easy to use link checking program.
robot-environment: commercial
modified-date: Tue, 31 Mar 1998 01:02:00 GMT
modified-by: Tom Aman

robot-id: cydralspider
robot-name: CydralSpider
robot-cover-url: http://www.cydral.com/
robot-details-url: http://en.cydral.com/help.html
robot-owner-name: Cydral
robot-owner-url: http://www.cydral.com/
robot-owner-email: cydral@cydral.com
robot-status: active
robot-purpose: gather Web content for image search engine service
robot-type: standalone
robot-platform: unix; windows
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: cydralspider
robot-noindex: yes
robot-host: *.cydral.com
robot-from: yes
robot-useragent: CydralSpider/X.X (Cydral Web Image Search;
 http://www.cydral.com/)
robot-language: c++
robot-description: Advanced image spider for www.cydral.com
robot-history: Developped in 2003, the robot uses new methods to discover Web
 sites and index images
robot-environment: commercial
modified-date: Tue, 17 Jun 2004, 11:50:30 GMT
modified-by: cydral@cydral.com

robot-id: desertrealm
robot-name: Desert Realm Spider
robot-cover-url: http://www.desertrealm.com
robot-details-url: http://spider.desertrealm.com
robot-owner-name: Brian B.
robot-owner-url: http://www.desertrealm.com
robot-owner-email: spider@desertrealm.com
robot-status: robot actively in use
robot-purpose: indexing
robot-type: standalone
robot-platform: cross platform
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: desertrealm, desert realm
robot-noindex: yes
robot-nofollow: yes
robot-host: *
robot-from: no
robot-useragent: DesertRealm.com; 0.2; [J];
robot-language: java 1.3, java 1.4
robot-description: The spider indexes fantasy and science fiction sites by
 using a customizable keyword algorithm. Only home pages are indexed, but all
 pages are looked at for links. Pages are visited randomly to limit impact on
 any one webserver.
robot-history: The spider originally was created to learn more about how
 search engines work.
robot-environment: hobby
modified-date: Fri, 19 Sep 2003 08:57:52 GMT
modified-by: Brian B.

robot-id:           deweb
robot-name:         DeWeb(c) Katalog/Index
robot-cover-url:    http://deweb.orbit.de/
robot-details-url:
robot-owner-name:   Marc Mielke
robot-owner-url:    http://www.orbit.de/
robot-owner-email:  dewebmaster@orbit.de
robot-status:       
robot-purpose:      indexing, mirroring, statistics
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         deweb.orbit.de
robot-from:         yes
robot-useragent:    Deweb/1.01
robot-language:     perl 4
robot-description:  Its purpose is to generate a Resource Discovery database,
	perform mirroring, and generate statistics. Uses combination
	of Informix(tm) Database and WN 1.11 serversoftware for
	indexing/ressource discovery, fulltext search, text
	excerpts.
robot-history:      
robot-environment:
modified-date:      Wed Jan 10 08:23:00 1996
modified-by:

robot-id: dienstspider
robot-name: DienstSpider
robot-cover-url: http://sappho.csi.forth.gr:22000/
robot-details-url:
robot-owner-name: Antonis Sidiropoulos 
robot-owner-url: http://www.csi.forth.gr/~asidirop
robot-owner-email: asidirop@csi.forth.gr
robot-status: development
robot-purpose: indexing
robot-type: standalone 
robot-platform: unix
robot-availability: none
robot-exclusion:
robot-exclusion-useragent:
robot-noindex:
robot-host: sappho.csi.forth.gr 
robot-from:
robot-useragent: dienstspider/1.0  
robot-language: C
robot-description: Indexing and searching the NCSTRL(Networked Computer Science Technical Report Library) and ERCIM Collection
robot-history: The version 1.0 was the developer's master thesis project
robot-environment: research
modified-date: Fri, 4 Dec 1998 0:0:0 GMT
modified-by: asidirop@csi.forth.gr

robot-id: digger
robot-name: Digger
robot-cover-url: http://www.diggit.com/
robot-details-url:
robot-owner-name: Benjamin Lipchak
robot-owner-url:
robot-owner-email: admin@bulldozersoftware.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix, windows
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: digger
robot-noindex: yes
robot-host:
robot-from: yes
robot-useragent: Digger/1.0 JDK/1.3.0
robot-language: java
robot-description: indexing web sites for the Diggit! search engine
robot-history:
robot-environment: service
modified-date:
modified-by:

robot-id: diibot
robot-name: Digital Integrity Robot
robot-cover-url: http://www.digital-integrity.com/robotinfo.html
robot-details-url: http://www.digital-integrity.com/robotinfo.html
robot-owner-name: Digital Integrity, Inc.
robot-owner-url: 
robot-owner-email: robot@digital-integrity.com
robot-status: Production
robot-purpose: WWW Indexing
robot-type:
robot-platform: unix
robot-availability: none
robot-exclusion: Conforms to robots.txt convention
robot-exclusion-useragent: DIIbot
robot-noindex: Yes
robot-host: digital-integrity.com
robot-from:
robot-useragent: DIIbot
robot-language: Java/C
robot-description:
robot-history: 
robot-environment:
modified-date:
modified-by:

robot-id: directhit
robot-name: Direct Hit Grabber
robot-cover-url: www.directhit.com
robot-details-url: http://www.directhit.com/about/company/spider.html
robot-status: active
robot-description: Direct Hit Grabber indexes documents and
 collects Web statistics for the Direct Hit Search Engine (available at
 www.directhit.com and our partners' sites)
robot-purpose: Indexing and statistics
robot-type: standalone
robot-platform: unix
robot-language: C++
robot-owner-name: Direct Hit Technologies, Inc.
robot-owner-url: www.directhit.com
robot-owner-email: DirectHitGrabber@directhit.com
robot-exclusion: yes
robot-exclusion-useragent: grabber
robot-noindex: yes
robot-host: *.directhit.com
robot-from: yes
robot-useragent: grabber
robot-environment: service
modified-by: grabber@directhit.com

robot-id: dnabot
robot-name: DNAbot
robot-cover-url: http://xx.dnainc.co.jp/dnabot/
robot-details-url: http://xx.dnainc.co.jp/dnabot/
robot-owner-name: Tom Tanaka
robot-owner-url: http://xx.dnainc.co.jp
robot-owner-email: tomatell@xx.dnainc.co.jp
robot-status: development       
robot-purpose: indexing 
robot-type: standalone          
robot-platform: unix, windows, windows95, windowsNT, mac
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent:
robot-noindex: no
robot-host: xx.dnainc.co.jp
robot-from: yes 
robot-useragent: DNAbot/1.0
robot-language: java 
robot-description: A search robot in 100 java, with its own built-in
 database engine and web server . Currently in Japanese.
robot-history: Developed by DNA, Inc.(Niigata City, Japan) in 1998.
robot-environment: commercial
modified-date: Mon, 4 Jan 1999 14:30:00 GMT
modified-by: Tom Tanaka

robot-id: download_express
robot-name: DownLoad Express
robot-cover-url: http://www.jacksonville.net/~dlxpress
robot-details-url: http://www.jacksonville.net/~dlxpress
robot-owner-name: DownLoad Express Inc
robot-owner-url: http://www.jacksonville.net/~dlxpress
robot-owner-email: dlxpress@mediaone.net
robot-status: active
robot-purpose: graphic download
robot-type: standalone
robot-platform: win95/98/NT
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: downloadexpress
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent:
robot-language: visual basic
robot-description: automatically downloads graphics from the web
robot-history:
robot-environment: commerical
modified-date: Wed, 05 May 1998
modified-by: DownLoad Express Inc

robot-id: dragonbot
robot-name: DragonBot
robot-cover-url: http://www.paczone.com/
robot-details-url:
robot-owner-name: Paul Law
robot-owner-url:
robot-owner-email: admin@paczone.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: DragonBot
robot-noindex: no
robot-host: *.paczone.com
robot-from: no
robot-useragent: DragonBot/1.0 libwww/5.0
robot-language: C++
robot-description: Collects web pages related to East Asia
robot-history:
robot-environment: service
modified-date: Mon, 11 Aug 1997 00:00:00 GMT
modified-by:

robot-id: dwcp
robot-name: DWCP (Dridus' Web Cataloging Project)
robot-cover-url: http://www.dridus.com/~rmm/dwcp.php3
robot-details-url: http://www.dridus.com/~rmm/dwcp.php3
robot-owner-name: Ross Mellgren (Dridus Norwind)
robot-owner-url: http://www.dridus.com/~rmm
robot-owner-email: rmm@dridus.com
robot-status: development
robot-purpose: indexing, statistics
robot-type: standalone
robot-platform: java
robot-availability: source, binary, data
robot-exclusion: yes
robot-exclusion-useragent: dwcp
robot-noindex: no
robot-host: *.dridus.com
robot-from: dridus@dridus.com
robot-useragent: DWCP/2.0
robot-language: java
robot-description: The DWCP robot is used to gather information for
 Dridus' Web Cataloging Project, which is intended to catalog domains and
 urls (no content).
robot-history: Developed from scratch by Dridus Norwind.
robot-environment: hobby
modified-date: Sat, 10 Jul 1999 00:05:40 GMT
modified-by: Ross Mellgren

robot-id: e-collector
robot-name: e-collector
robot-cover-url: http://www.thatrobotsite.com/agents/ecollector.htm
robot-details-url: http://www.thatrobotsite.com/agents/ecollector.htm
robot-owner-name: Dean Smart
robot-owner-url: http://www.thatrobotsite.com
robot-owner-email: smarty@thatrobotsite.com
robot-status: Active
robot-purpose: email collector
robot-type: Collector of email addresses
robot-platform: Windows 9*/NT/2000
robot-availability: Binary
robot-exclusion: No
robot-exclusion-useragent: ecollector
robot-noindex: No
robot-host: *
robot-from: No
robot-useragent: LWP::
robot-language: Perl5
robot-description: e-collector in the simplist terms is a e-mail address
 collector, thus the name e-collector.
 So what?
 Have you ever wanted to have the email addresses of as many companys that
 sell or supply for example "dried fruit", i personnaly don't but this is
 just an example.
 Those of you who may use this type of robot will know exactly what you can
 do with information, first don't spam with it,  for those still not sure
 what this type of robot will do for you then take this for example:
 Your a international distributer of "dried fruit" and you boss has told you
 if you rise sales by 10% then he will bye you a new car (Wish i had a boss
 like that), well anyway there are thousands of shops distributers ect, that
 you could be doing business with but you don't know who they are?, because
 there in other countries or the nearest town but have never heard about them
 before.  Has the penny droped yet, no well now you have the opertunity to
 find out who they are with an internet address and a person to contact in
 that company just by downloading and running e-collector.
 Plus it's free,  you don't have to do any leg work just run the program and
 sit back and watch your potential customers arriving.
robot-history: -
robot-environment: Service
modified-date: Weekly
modified-by: Dean Smart

robot-id:ebiness
robot-name:EbiNess
robot-cover-url:http://sourceforge.net/projects/ebiness
robot-details-url:http://ebiness.sourceforge.net/
robot-owner-name:Mike Davis
robot-owner-url:http://www.carisbrook.co.uk/mike
robot-owner-email:mdavis@kieser.net
robot-status:Pre-Alpha
robot-purpose:statistics
robot-type:standalone
robot-platform:unix(Linux)
robot-availability:Open Source
robot-exclusion:yes
robot-exclusion-useragent:ebiness
robot-noindex:no
robot-host:
robot-from:no
robot-useragent:EbiNess/0.01a
robot-language:c++
robot-description:Used to build a url relationship database, to be viewed in 3D
robot-history:Dreamed it up over some beers
robot-environment:hobby
modified-date:Mon, 27 Nov 2000 12:26:00 GMT
modified-by:Mike Davis

robot-id:           eit
robot-name:         EIT Link Verifier Robot
robot-cover-url:    http://wsk.eit.com/wsk/dist/doc/admin/webtest/verify_links.html
robot-details-url:
robot-owner-name:   Jim McGuire
robot-owner-url:    http://www.eit.com/people/mcguire.html
robot-owner-email:  mcguire@eit.COM
robot-status:       
robot-purpose:      maintenance
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         
robot-useragent:    EIT-Link-Verifier-Robot/0.2
robot-language:     
robot-description:  Combination of an HTML form and a CGI script that verifies
	links from a given starting point (with some controls to
	prevent it going off-site or limitless)
robot-history:      Announced on 12 July 1994
robot-environment:
modified-date:      
modified-by:

robot-id: elfinbot
robot-name:ELFINBOT
robot-cover-url:http://letsfinditnow.com
robot-details-url:http://letsfinditnow.com/elfinbot.html
robot-owner-name:Lets Find It Now Ltd
robot-owner-url:http://letsfinditnow.com
robot-owner-email:admin@letsfinditnow.com
robot-status:Active
robot-purpose:Indexing for the Lets Find It Now search Engine
robot-type:Standalone
robot-platform:Unix
robot-availability:None
robot-exclusion: yes
robot-exclusion-useragent:elfinbot
robot-noindex:yes
robot-host:*.letsfinditnow.com
robot-from:no
robot-useragent:elfinbot
robot-language:Perl5
robot-description:ELFIN is used to index and add data to the "Lets Find It Now
 Search Engine" (http://letsfinditnow.com). The robot runs every 30 days.
robot-history:
robot-environment:
modified-date:
modified-by:

robot-id:           emacs
robot-name:         Emacs-w3 Search Engine
robot-cover-url:    http://www.cs.indiana.edu/elisp/w3/docs.html
robot-details-url:
robot-owner-name:   William M. Perry
robot-owner-url:    http://www.cs.indiana.edu/hyplan/wmperry.html
robot-owner-email:  wmperry@spry.com
robot-status:       retired
robot-purpose:      indexing
robot-type:         browser
robot-platform:     
robot-availability: 
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         yes
robot-useragent:    Emacs-w3/v[0-9\.]+
robot-language:     lisp
robot-description:  Its purpose is to generate a Resource Discovery database
	This code has not been looked at in a while, but will be
	spruced up for the Emacs-w3 2.2.0 release sometime this
	month. It will honor the /robots.txt file at that
	time.
robot-history:      
robot-environment:
modified-date:      Fri May 5 16:09:18 1995
modified-by:

robot-id:           emcspider
robot-name:         ananzi
robot-cover-url:    http://www.empirical.com/
robot-details-url:
robot-owner-name:   Hunter Payne
robot-owner-url:    http://www.psc.edu/~hpayne/
robot-owner-email:  hpayne@u-media.com
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         bilbo.internal.empirical.com
robot-from:         yes
robot-useragent:    EMC Spider
robot-language:     java This spider is still in the development stages but, it
	will be hitting sites while I finish debugging it.
robot-description:
robot-history:
robot-environment:
modified-date:      Wed May 29 14:47:01 1996.
modified-by:

robot-id: esculapio
robot-name: esculapio
robot-cover-url: http://esculapio.cype.com
robot-details-url: http://esculapio.cype.com/details.htm
robot-owner-name: CYPE Ingenieros
robot-owner-url: http://www.cype.com
robot-owner-email: imasd@cype.com
robot-status: active
robot-purpose: link validation
robot-type: standalone
robot-platform: linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: esculapio
robot-noindex: yes
robot-host: 80.34.92.45
robot-from: yes
robot-useragent: esculapio/1.1
robot-language: C++
robot-description: Checks the integrity of the links between several
 domains.
robot-history: First, a research project. Now, an internal tool. Next, ???.
robot-environment: research, service
modified-date: Mon, 6 Jun 2004 08:25 +1 GMT
modified-by:

robot-id: esther
robot-name: Esther
robot-details-url: http://search.falconsoft.com/
robot-cover-url: http://search.falconsoft.com/
robot-owner-name: Tim Gustafson
robot-owner-url: http://www.falconsoft.com/
robot-owner-email:      tim@falconsoft.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix (FreeBSD 2.2.8)
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: esther
robot-noindex: no
robot-host: *.falconsoft.com
robot-from: yes
robot-useragent: esther
robot-language: perl5
robot-description: This crawler is used to build the search database at
 http://search.falconsoft.com/
robot-history: Developed by FalconSoft.
robot-environment: service
modified-date: Tue, 22 Dec 1998 00:22:00 PST

robot-id: evliyacelebi
robot-name: Evliya Celebi
robot-cover-url: http://ilker.ulak.net.tr/EvliyaCelebi
robot-details-url: http://ilker.ulak.net.tr/EvliyaCelebi
robot-owner-name: Ilker TEMIR
robot-owner-url: http://ilker.ulak.net.tr
robot-owner-email: ilker@ulak.net.tr
robot-status: development
robot-purpose: indexing turkish content
robot-type: standalone
robot-platform: unix
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: N/A
robot-noindex: no
robot-nofollow: no
robot-host: 193.140.83.*
robot-from: ilker@ulak.net.tr
robot-useragent: Evliya Celebi v0.151 - http://ilker.ulak.net.tr
robot-language: perl5
robot-history:
robot-description: crawles pages under ".tr" domain or having turkish character
 encoding (iso-8859-9 or windows-1254)
robot-environment: hobby
modified-date: Fri Mar 31 15:03:12 GMT 2000

robot-id:           nzexplorer
robot-name:         nzexplorer
robot-cover-url:    http://nzexplorer.co.nz/
robot-details-url:
robot-owner-name:   Paul Bourke
robot-owner-url:    http://bourke.gen.nz/paul.html
robot-owner-email:  paul@bourke.gen.nz
robot-status:       active
robot-purpose:      indexing, statistics
robot-type:         standalone
robot-platform:     UNIX
robot-availability: source (commercial)
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         bitz.co.nz
robot-from:         no
robot-useragent:    explorersearch
robot-language:     c++
robot-history:      Started in 1995 to provide a comprehensive index
                    to WWW pages within New Zealand. Now also used in
                    Malaysia and other countries.
robot-environment:  service
modified-date:      Tues, 25 Jun 1996
modified-by:        Paul Bourke

robot-id: fastcrawler
robot-name: FastCrawler
robot-cover-url: http://www.1klik.dk/omos/
robot-details-url: http://www.1klik.dk/omos/
robot-owner-name: 1klik.dk A/S
robot-owner-url: http://www.1klik.dk
robot-owner-email: crawler@1klik.dk
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Windows 2000 Adv. Server
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: fastcrawler
robot-noindex: yes
robot-host: 1klik.dk
robot-from: yes
robot-useragent: FastCrawler 3.0.X (crawler@1klik.dk) - http://www.1klik.dk
robot-language: C++
robot-description: FastCrawler is used to build the databases for search engines used by 1klik.dk and it's partners
robot-history: Robot started in April 1999
robot-environment: commercial
modified-date: 05-08-2001
modified-by: Kim Gam-Jensen

robot-id:fdse
robot-name:Fluid Dynamics Search Engine robot
robot-cover-url:http://www.xav.com/scripts/search/
robot-details-url:http://www.xav.com/scripts/search/
robot-owner-name:Zoltan Milosevic
robot-owner-url:http://www.xav.com/
robot-owner-email:zoltanm@nickname.net
robot-status:active
robot-purpose:indexing
robot-type:standalone
robot-platform:unix;windows
robot-availability:source;data
robot-exclusion:yes
robot-exclusion-useragent:FDSE
robot-noindex:yes
robot-host:yes
robot-from:*
robot-useragent:Mozilla/4.0 (compatible: FDSE robot)
robot-language:perl5
robot-description:Crawls remote sites as part of a shareware search engine
 program
robot-history:Developed in late 1998 over three pots of coffee
robot-environment:commercial
modified-date:Fri, 21 Jan 2000 10:15:49 GMT
modified-by:Zoltan Milosevic

robot-id:	felix
robot-name:	Felix IDE
robot-cover-url:	http://www.pentone.com
robot-details-url:	http://www.pentone.com
robot-owner-name:	The Pentone Group, Inc.
robot-owner-url:	http://www.pentone.com
robot-owner-email:	felix@pentone.com
robot-status:	active
robot-purpose:	indexing, statistics
robot-type:	standalone
robot-platform:	windows95, windowsNT
robot-availability:	binary
robot-exclusion:	yes
robot-exclusion-useragent:	FELIX IDE
robot-noindex:	yes
robot-host:	*
robot-from:	yes
robot-useragent:	FelixIDE/1.0
robot-language:	visual basic
robot-description:	Felix IDE is a retail personal search spider sold by
  The Pentone Group, Inc.
  It supports the proprietary exclusion "Frequency: ??????????" in the
  robots.txt file. Question marks represent an integer
  indicating number of milliseconds to delay between document requests. This
  is called VDRF(tm) or Variable Document Retrieval Frequency. Note that
  users can re-define the useragent name.
robot-history:	This robot began as an in-house tool for the lucrative Felix
  IDS (Information Discovery Service) and has gone retail.
robot-environment:	service, commercial, research
modified-date:	Fri, 11 Apr 1997 19:08:02 GMT
modified-by:	Kerry B. Rogers

robot-id:           ferret
robot-name:         Wild Ferret Web Hopper #1, #2, #3
robot-cover-url:    http://www.greenearth.com/
robot-details-url:
robot-owner-name:   Greg Boswell
robot-owner-url:    http://www.greenearth.com/
robot-owner-email:  ghbos@postoffice.worldnet.att.net
robot-status:
robot-purpose:      indexing maintenance statistics
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    Hazel's Ferret Web hopper, 
robot-language:     C++, Visual Basic, Java
robot-description:  The wild ferret web hopper's are designed as specific agents
	to retrieve data from all available sources on the internet.
	They work in an onion format hopping from spot to spot one
	level at a time over the internet. The information is
	gathered into different relational databases, known as
	"Hazel's Horde". The information is publicly available and
	will be free for the browsing at www.greenearth.com.
	Effective date of the data posting is to be
	announced.
robot-history:
robot-environment:
modified-date:      Mon Feb 19 00:28:37 1996.
modified-by:

robot-id: fetchrover
robot-name: FetchRover
robot-cover-url: http://www.engsoftware.com/fetch.htm
robot-details-url: http://www.engsoftware.com/spiders/
robot-owner-name: Dr. Kenneth R. Wadland
robot-owner-url: http://www.engsoftware.com/
robot-owner-email: ken@engsoftware.com
robot-status: active
robot-purpose: maintenance, statistics
robot-type: standalone
robot-platform: Windows/NT, Windows/95, Solaris SPARC
robot-availability: binary, source
robot-exclusion: yes
robot-exclusion-useragent: ESI
robot-noindex: N/A
robot-host: *
robot-from: yes
robot-useragent: ESIRover v1.0
robot-language: C++
robot-description: FetchRover fetches Web Pages.  
   It is an automated page-fetching engine. FetchRover can be
   used stand-alone or as the front-end to a full-featured Spider.
   Its database can use any ODBC compliant database server, including
   Microsoft Access, Oracle, Sybase SQL Server, FoxPro, etc.
robot-history:  Used as the front-end to SmartSpider (another Spider 
   product sold by Engineeering Software, Inc.)
robot-environment: commercial, service
modified-date: Thu, 03 Apr 1997 21:49:50 EST
modified-by: Ken Wadland

robot-id: fido
robot-name: fido
robot-cover-url: http://www.planetsearch.com/
robot-details-url: http://www.planetsearch.com/info/fido.html
robot-owner-name: Steve DeJarnett
robot-owner-url: http://www.planetsearch.com/staff/steved.html
robot-owner-email: fido@planetsearch.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: fido
robot-noindex: no
robot-host: fido.planetsearch.com, *.planetsearch.com, 206.64.113.*
robot-from: yes
robot-useragent: fido/0.9 Harvest/1.4.pl2
robot-language: c, perl5
robot-description: fido is used to gather documents for the search engine 
                   provided in the PlanetSearch service, which is operated by
                   the Philips Multimedia Center.  The robots runs on an
                   ongoing basis.
robot-history: fido was originally based on the Harvest Gatherer, but has since
               evolved into a new creature.  It still uses some support code
               from Harvest.
robot-environment: service
modified-date: Sat, 2 Nov 1996 00:08:18 GMT
modified-by: Steve DeJarnett

robot-id:           finnish
robot-name:         Hmhkki
robot-cover-url:    http://www.fi/search.html
robot-details-url:  http://www.fi/www/spider.html
robot-owner-name:   Timo Metsl
robot-owner-url:    http://www.fi/~timo/
robot-owner-email:  Timo.Metsala@www.fi
robot-status:       active
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     UNIX
robot-availability: no
robot-exclusion:    yes
robot-exclusion-useragent:  Hmhkki
robot-noindex:      no
robot-host:         *.www.fi
robot-from:         yes
robot-useragent:    Hmhkki/0.2
robot-language:     C
robot-description:  Its purpose is to generate a Resource Discovery
	database from the Finnish (top-level domain .fi) www servers.
	The resulting database is used by the search engine 
	at http://www.fi/search.html.
robot-history:      (The name Hmhkki is just Finnish for spider.)
robot-environment:
modified-date:      1996-06-25   
modified-by:        Jaakko.Hyvatti@www.fi

robot-id: fireball
robot-name: KIT-Fireball
robot-cover-url: http://www.fireball.de
robot-details-url: http://www.fireball.de/technik.html (in German)
robot-owner-name: Gruner + Jahr Electronic Media Service GmbH
robot-owner-url: http://www.ems.guj.de
robot-owner-email:info@fireball.de
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: KIT-Fireball
robot-noindex: yes
robot-host: *.fireball.de
robot-from: yes
robot-useragent: KIT-Fireball/2.0 libwww/5.0a
robot-language: c
robot-description: The Fireball robots gather web documents in German
 language for the database of the Fireball search service.
robot-history: The robot was developed by Benhui Chen in a research
 project at the Technical University of Berlin in 1996 and was
 re-implemented by its developer in 1997 for the present owner.
robot-environment: service 
modified-date: Mon Feb 23 11:26:08 1998
modified-by: Detlev Kalb

robot-id:           fish
robot-name:         Fish search
robot-cover-url:    http://www.win.tue.nl/bin/fish-search
robot-details-url:
robot-owner-name:   Paul De Bra
robot-owner-url:    http://www.win.tue.nl/win/cs/is/debra/
robot-owner-email:  debra@win.tue.nl
robot-status:       
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     
robot-availability: binary
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         www.win.tue.nl
robot-from:         no
robot-useragent:    Fish-Search-Robot
robot-language:     c
robot-description:  Its purpose is to discover resources on the fly a version
	exists that is integrated into the T&uuml;bingen Mosaic
	2.4.2 browser (also written in C)
robot-history:      Originated as an addition to Mosaic for X
robot-environment:
modified-date:      Mon May 8 09:31:19 1995
modified-by:

robot-id: fouineur
robot-name: Fouineur
robot-cover-url: http://fouineur.9bit.qc.ca/
robot-details-url: http://fouineur.9bit.qc.ca/informations.html
robot-owner-name: Joel Vandal
robot-owner-url: http://www.9bit.qc.ca/~jvandal/
robot-owner-email: jvandal@9bit.qc.ca
robot-status: development
robot-purpose: indexing, statistics
robot-type: standalone
robot-platform: unix, windows 
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: fouineur
robot-noindex: no
robot-host: *
robot-from: yes
robot-useragent: Mozilla/2.0 (compatible fouineur v2.0; fouineur.9bit.qc.ca)
robot-language: perl5
robot-description: This robot build automaticaly a database that is used
                   by our own search engine. This robot auto-detect the
                   language (french, english & spanish) used in the HTML
                   page. Each database record generated by this robot
                   include: date, url, title, total words, title, size
                   and de-htmlized text. Also support server-side and
                   client-side IMAGEMAP.
robot-history: No robots does all thing that we need for our usage.
robot-environment: service
modified-date: Thu, 9 Jan 1997 22:57:28 EST
modified-by: jvandal@9bit.qc.ca

robot-id:           francoroute
robot-name:         Robot Francoroute
robot-cover-url:
robot-details-url:
robot-owner-name:   Marc-Antoine Parent
robot-owner-url:    http://www.crim.ca/~maparent
robot-owner-email:  maparent@crim.ca
robot-status:
robot-purpose:      indexing, mirroring, statistics
robot-type:         browser
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         zorro.crim.ca
robot-from:         yes
robot-useragent:    Robot du CRIM 1.0a
robot-language:     perl5, sqlplus
robot-description:  Part of the RISQ's Francoroute project for researching
	francophone. Uses the Accept-Language tag and reduces demand
	accordingly
robot-history:
robot-environment:
modified-date:      Wed Jan 10 23:56:22 1996.
modified-by:

robot-id: freecrawl
robot-name: Freecrawl
robot-cover-url: http://euroseek.net/
robot-owner-name: Jesper Ekhall
robot-owner-email: ekhall@freeside.net
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Freecrawl
robot-noindex: no
robot-host: *.freeside.net
robot-from: yes
robot-useragent: Freecrawl
robot-language: c
robot-description: The Freecrawl robot is used to build a database for the
  EuroSeek service.
robot-environment: service

robot-id:           funnelweb
robot-name:         FunnelWeb
robot-cover-url:    http://funnelweb.net.au
robot-details-url:
robot-owner-name:   David Eagles
robot-owner-url:    http://www.pc.com.au
robot-owner-email:  eaglesd@pc.com.au
robot-status:       
robot-purpose:      indexing, statisitics
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         earth.planets.com.au
robot-from:         yes
robot-useragent:    FunnelWeb-1.0
robot-language:     c and c++
robot-description:  Its purpose is to generate a Resource Discovery database,
	and generate statistics. Localised South Pacific Discovery
	and Search Engine, plus distributed operation under
	development.
robot-history:      
robot-environment:
modified-date:      Mon Nov 27 21:30:11 1995
modified-by:

robot-id:	      gama
robot-name: gammaSpider, FocusedCrawler
robot-details-url: http://www.gammasite.com, http://www.gammasite.com/gammaSpider.html
robot-cover-url: http://www.gammasite.com
robot-owner-name: gammasite
robot-owner-url: http://www.gammasite.com
robot-owner-email:	support@gammasite.com
robot-status: active
robot-purpose: indexing, maintenance
robot-type: standalone
robot-platform: unix, windows, windows95, windowsNT, linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: gammaSpider
robot-noindex:	no
robot-nofollow: no
robot-host: *
robot-from: no
robot-useragent: gammaSpider xxxxxxx ()/
robot-language: c++
robot-description:
  Information gathering.
  Focused carwling on specific topic.
  Uses gammaFetcherServer
  Product for selling.
  RobotUserAgent may changed by the user.
  More features are being added.
  The product is constatnly under development.
  AKA FocusedCrawler
robot-history: AKA FocusedCrawler
robot-environment: service, commercial, research
modified-date: Sun, 25 Mar 2001 18:49:52 GMT

robot-id: gazz
robot-name: gazz
robot-cover-url: http://gazz.nttrd.com/
robot-details-url: http://gazz.nttrd.com/
robot-owner-name: NTT Cyberspace Laboratories
robot-owner-url: http://gazz.nttrd.com/
robot-owner-email: gazz@nttrd.com
robot-status: development
robot-purpose: statistics
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: gazz
robot-noindex: yes
robot-host: *.nttrd.com, *.infobee.ne.jp
robot-from: yes
robot-useragent: gazz/1.0
robot-language: c
robot-description: This robot is used for research purposes.
robot-history: Its root is TITAN project in NTT.
robot-environment: research
modified-date: Wed, 09 Jun 1999 10:43:18 GMT
modified-by: noto@isl.ntt.co.jp

robot-id: gcreep
robot-name: GCreep
robot-cover-url: http://www.instrumentpolen.se/gcreep/index.html
robot-details-url: http://www.instrumentpolen.se/gcreep/index.html
robot-owner-name: Instrumentpolen AB
robot-owner-url: http://www.instrumentpolen.se/ip-kontor/eng/index.html
robot-owner-email: anders@instrumentpolen.se
robot-status: development
robot-purpose: indexing
robot-type: browser+standalone
robot-platform: linux+mysql
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: gcreep
robot-noindex: yes
robot-host: mbx.instrumentpolen.se
robot-from: yes
robot-useragent: gcreep/1.0
robot-language: c
robot-description: Indexing robot to learn SQL
robot-history: Spare time project begun late '96, maybe early '97
robot-environment: hobby
modified-date: Fri, 23 Jan 1998 16:09:00 MET
modified-by: Anders Hedstrom

robot-id:           getbot
robot-name:         GetBot
robot-cover-url:    http://www.blacktop.com.zav/bots 
robot-details-url:
robot-owner-name:   Alex Zavatone
robot-owner-url:    http://www.blacktop.com/zav
robot-owner-email:  zav@macromedia.com
robot-status:
robot-purpose:      maintenance
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no.
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         no
robot-useragent:    ???
robot-language:     Shockwave/Director.
robot-description:  GetBot's purpose is to index all the sites it can find that
	contain Shockwave movies.  It is the first bot or spider
	written in Shockwave.  The bot was originally written at
	Macromedia on a hungover Sunday as a proof of concept. -
	Alex Zavatone 3/29/96
robot-history:
robot-environment:
modified-date:      Fri Mar 29 20:06:12 1996.
modified-by:

robot-id:           geturl
robot-name:         GetURL
robot-cover-url:    http://Snark.apana.org.au/James/GetURL/
robot-details-url:
robot-owner-name:   James Burton
robot-owner-url:    http://Snark.apana.org.au/James/
robot-owner-email:  James@Snark.apana.org.au
robot-status:       
robot-purpose:      maintenance, mirroring
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         no
robot-useragent:    GetURL.rexx v1.05
robot-language:     ARexx (Amiga REXX)
robot-description:  Its purpose is to validate links, perform mirroring, and
	copy document trees. Designed as a tool for retrieving web
	pages in batch mode without the encumbrance of a browser.
	Can be used to describe a set of pages to fetch, and to
	maintain an archive or mirror. Is not run by a central site
	and accessed by clients - is run by the end user or archive
	maintainer
robot-history:      
robot-environment:
modified-date:      Tue May 9 15:13:12 1995		
modified-by:

robot-id: golem
robot-name: Golem
robot-cover-url: http://www.quibble.com/golem/
robot-details-url: http://www.quibble.com/golem/
robot-owner-name: Geoff Duncan
robot-owner-url: http://www.quibble.com/geoff/
robot-owner-email: geoff@quibble.com
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: mac
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: golem
robot-noindex: no
robot-host: *.quibble.com
robot-from: yes
robot-useragent: Golem/1.1
robot-language: HyperTalk/AppleScript/C++
robot-description: Golem generates status reports on collections of URLs
  supplied by clients. Designed to assist with editorial updates of
  Web-related sites or products.
robot-history: Personal project turned into a contract service for private
  clients.
robot-environment: service,research
modified-date: Wed, 16 Apr 1997 20:50:00 GMT
modified-by: Geoff Duncan

robot-id: googlebot
robot-name: Googlebot
robot-cover-url: http://www.googlebot.com/ 
robot-details-url: http://www.googlebot.com/bot.html
robot-owner-name: Google Inc.
robot-owner-url: http://www.google.com/
robot-owner-email: googlebot@google.com 
robot-status: active
robot-purpose: indexing
robot-type: standalone 
robot-platform: Linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: googlebot
robot-noindex: yes
robot-host: googlebot.com
robot-from: yes 
robot-useragent: Googlebot/2.X (+http://www.googlebot.com/bot.html)
robot-language: c++
robot-description: Google's crawler
robot-history: Developed by Google Inc
robot-environment: commercial
modified-date: Thu Mar 29 21:00:07 PST 2001
modified-by: googlebot@google.com

robot-id: grapnel
robot-name: Grapnel/0.01 Experiment
robot-cover-url: varies
robot-details-url: mailto:v93_kat@ce.kth.se
robot-owner-name: Philip Kallerman
robot-owner-url: v93_kat@ce.kth.se
robot-owner-email: v93_kat@ce.kth.se
robot-status: Experimental
robot-purpose: Indexing
robot-type:
robot-platform: WinNT
robot-availability: None, yet
robot-exclusion: Yes
robot-exclusion-useragent: No
robot-noindex: No
robot-host: varies
robot-from: Varies
robot-useragent:
robot-language: Perl
robot-description: Resource Discovery Experimentation
robot-history: None, hoping to make some
robot-environment:
modified-date:
modified-by: 7 Feb 1997

robot-id:griffon
robot-name:Griffon                                                               
robot-cover-url:http://navi.ocn.ne.jp/                                           
robot-details-url:http://navi.ocn.ne.jp/griffon/                                 
robot-owner-name:NTT Communications Corporate Users Business Division            
robot-owner-url:http://navi.ocn.ne.jp/                                           
robot-owner-email:griffon@super.navi.ocn.ne.jp                                   
robot-status:active                                                              
robot-purpose:indexing                                                           
robot-type:standalone                                                            
robot-platform:unix                                                              
robot-availability:none                                                          
robot-exclusion:yes                                                              
robot-exclusion-useragent:griffon                                                
robot-noindex:yes                                                                
robot-nofollow:yes                                                              
robot-host:*.navi.ocn.ne.jp                                                      
robot-from:yes                                                                   
robot-useragent:griffon/1.0                                                      
robot-language:c                                                                 
robot-description:The Griffon robot is used to build database for the OCN navi   
       search service operated by NTT Communications Corporation.
       It mainly gathers pages written in Japanese.            
robot-history:Its root is TITAN project in NTT.                                  
robot-environment:service                                                        
modified-date:Mon,25 Jan 2000 15:25:30 GMT                                       
modified-by:toka@navi.ocn.ne.jp

robot-id: gromit
robot-name: Gromit
robot-cover-url: http://www.austlii.edu.au/
robot-details-url: http://www2.austlii.edu.au/~dan/gromit/
robot-owner-name: Daniel Austin
robot-owner-url: http://www2.austlii.edu.au/~dan/
robot-owner-email: dan@austlii.edu.au
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Gromit
robot-noindex: no
robot-host: *.austlii.edu.au
robot-from: yes
robot-useragent: Gromit/1.0
robot-language: perl5
robot-description: Gromit is a Targetted Web Spider that indexes legal
 sites contained in the AustLII legal links database.
robot-history: This robot is based on the Perl5 LWP::RobotUA module.
robot-environment: research
modified-date: Wed, 11 Jun 1997 03:58:40 GMT
modified-by: Daniel Austin

robot-id: gulliver
robot-name: Northern Light Gulliver
robot-cover-url:
robot-details-url:
robot-owner-name: Mike Mulligan
robot-owner-url:
robot-owner-email: crawler@northernlight.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: gulliver
robot-noindex: yes
robot-host: scooby.northernlight.com, taz.northernlight.com,
  gulliver.northernlight.com
robot-from: yes
robot-useragent: Gulliver/1.1
robot-language: c
robot-description: Gulliver is a robot to be used to collect
  web pages for indexing and subsequent searching of the index.
robot-history: Oct 1996: development; Dec 1996-Jan 1997: crawl & debug;
  Mar 1997: crawl again;
robot-environment: service
modified-date: Wed, 21 Apr 1999 16:00:00 GMT
modified-by: Mike Mulligan

robot-id: gulperbot
robot-name: Gulper Bot
robot-cover-url: http://yuntis.ecsl.cs.sunysb.edu/
robot-details-url: http://yuntis.ecsl.cs.sunysb.edu/help/robot/
robot-owner-name: Maxim Lifantsev
robot-owner-url: http://www.cs.sunysb.edu/~maxim/
robot-owner-email: gulperbot@ecsl.cs.sunysb.edu
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: gulper
robot-noindex: yes
robot-nofollow: yes
robot-host: yuntis*.ecsl.cs.sunysb.edu
robot-from: no
robot-useragent: Gulper Web Bot 0.2.4 (www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)
robot-language: c++
robot-description: The Gulper Bot is used to collect data for the Yuntis research search engine project.
robot-history: Developed in a research project at SUNY Stony Brook.
robot-environment: research
modified-date: Tue, 28 Aug 2001 21:40:47 GMT
modified-by: maxim@cs.sunysb.edu

robot-id: hambot
robot-name: HamBot
robot-cover-url: http://www.hamrad.com/search.html
robot-details-url: http://www.hamrad.com/
robot-owner-name: John Dykstra
robot-owner-url:
robot-owner-email: john@futureone.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix, Windows95
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: hambot
robot-noindex: yes
robot-host: *.hamrad.com
robot-from:
robot-useragent:
robot-language: perl5, C++
robot-description: Two HamBot robots are used (stand alone & browser based)
 to aid in building the database for HamRad Search - The Search Engine for
 Search Engines.  The robota are run intermittently and perform nearly
 identical functions.
robot-history: A non commercial (hobby?) project to aid in building and
 maintaining the database for the the HamRad search engine.
robot-environment: service
modified-date: Fri, 17 Apr 1998 21:44:00 GMT
modified-by: JD

robot-id:           harvest
robot-name:         Harvest
robot-cover-url:    http://harvest.cs.colorado.edu
robot-details-url:
robot-owner-name:   
robot-owner-url:    
robot-owner-email:  
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      
robot-host:         bruno.cs.colorado.edu
robot-from:         yes
robot-useragent:    yes
robot-language:     
robot-description:  Harvest's motivation is to index community- or topic-
	specific collections, rather than to locate and index all
	HTML objects that can be found.  Also, Harvest allows users
	to control the enumeration several ways, including stop
	lists and depth and count limits.  Therefore, Harvest
	provides a much more controlled way of indexing the Web than
	is typical of robots. Pauses 1 second between requests (by
	default).
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: havindex
robot-name: havIndex
robot-cover-url: http://www.hav.com/
robot-details-url: http://www.hav.com/
robot-owner-name: hav.Software and Horace A. (Kicker) Vallas
robot-owner-url: http://www.hav.com/
robot-owner-email: havIndex@hav.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Java VM 1.1
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: havIndex
robot-noindex: yes
robot-host: *
robot-from: no
robot-useragent: havIndex/X.xx[bxx]
robot-language: Java
robot-description: havIndex allows individuals to build searchable word
 index of (user specified) lists of URLs.  havIndex does not crawl -
 rather it requires  one or more user supplied lists of URLs to be
 indexed.  havIndex does (optionally) save urls parsed from indexed
  pages.
robot-history: Developed to answer client requests for URL specific
 index capabilities.
robot-environment: commercial, service
modified-date: 6-27-98
modified-by: Horace A. (Kicker) Vallas

robot-id:           hi
robot-name:         HI (HTML Index) Search
robot-cover-url:    http://cs6.cs.ait.ac.th:21870/pa.html
robot-details-url:
robot-owner-name:   Razzakul Haider Chowdhury
robot-owner-url:    http://cs6.cs.ait.ac.th:21870/index.html
robot-owner-email:  a94385@cs.ait.ac.th
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         yes
robot-useragent:    AITCSRobot/1.1
robot-language:     perl 5
robot-description:  Its purpose is to generate a Resource Discovery database.
	This Robot traverses the net and creates a searchable
	database of Web pages. It stores the title string of the
	HTML document and the absolute url. A search engine provides
	the boolean AND & OR query models with or without filtering
	the stop list of words. Feature is kept for the Web page
	owners to add the url to the searchable database.
robot-history:      
robot-environment:
modified-date:      Wed Oct  4 06:54:31 1995
modified-by:

robot-id: hometown
robot-name: Hometown Spider Pro
robot-cover-url: http://www.hometownsingles.com
robot-details-url: http://www.hometownsingles.com
robot-owner-name: Bob Brown
robot-owner-url: http://www.hometownsingles.com
robot-owner-email: admin@hometownsingles.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: *
robot-noindex: yes
robot-host: 63.195.193.17
robot-from: no
robot-useragent: Hometown Spider Pro
robot-language: delphi
robot-description: The Hometown Spider Pro is used to maintain the indexes
 for Hometown Singles.
robot-history: Innerprise URL Spider Pro
robot-environment: commercial
modified-date: Tue, 28 Mar 2000 16:00:00 GMT
modified-by: Hometown Singles

robot-id: wired-digital
robot-name: Wired Digital
robot-cover-url:
robot-details-url:
robot-owner-name: Bowen Dwelle
robot-owner-url:
robot-owner-email: bowen@hotwired.com
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: hotwired
robot-noindex: no
robot-host: gossip.hotwired.com
robot-from: yes
robot-useragent: wired-digital-newsbot/1.5
robot-language: perl-5.004
robot-description: this is a test
robot-history:
robot-environment: research
modified-date: Thu, 30 Oct 1997
modified-by: bowen@hotwired.com

robot-id:           htdig
robot-name:         ht://Dig
robot-cover-url:    http://www.htdig.org/
robot-details-url:  http://www.htdig.org/howitworks.html
robot-owner-name:   Andrew Scherpbier
robot-owner-url:    http://www.htdig.org/author.html
robot-owner-email:  andrew@contigo.com
robot-owner-name2:  Geoff Hutchison 
robot-owner-url2:   http://wso.williams.edu/~ghutchis/
robot-owner-email2: ghutchis@wso.williams.edu
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     unix
robot-availability: source
robot-exclusion:    yes
robot-exclusion-useragent: htdig
robot-noindex:      yes
robot-host:         *
robot-from:         no
robot-useragent:    htdig/3.1.0b2
robot-language:     C,C++.
robot-history:This robot was originally developed for use at San Diego
 State University.
robot-environment:
modified-date:Tue, 3 Nov 1998 10:09:02 EST 
modified-by: Geoff Hutchison <Geoffrey.R.Hutchison@williams.edu>

robot-id:           htmlgobble
robot-name:         HTMLgobble
robot-cover-url:    
robot-details-url:
robot-owner-name:   Andreas Ley
robot-owner-url:    
robot-owner-email:  ley@rz.uni-karlsruhe.de
robot-status:       
robot-purpose:      mirror
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         tp70.rz.uni-karlsruhe.de
robot-from:         yes
robot-useragent:    HTMLgobble v2.2
robot-language:     
robot-description:  A mirroring robot. Configured to stay within a directory,
	sleeps between requests, and the next version will use HEAD
	to check if the entire document needs to be
	retrieved
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id:           hyperdecontextualizer
robot-name:         Hyper-Decontextualizer
robot-cover-url:    http://www.tricon.net/Comm/synapse/spider/
robot-details-url:
robot-owner-name:   Cliff Hall
robot-owner-url:    http://kpt1.tricon.net/cgi-bin/cliff.cgi
robot-owner-email:  cliff@tricon.net
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         no
robot-useragent:    no
robot-language:     Perl 5 Takes an input sentence and marks up each word with
	an appropriate hyper-text link.
robot-description:
robot-history:
robot-environment:
modified-date:      Mon May  6 17:41:29 1996.
modified-by:

robot-id: iajabot
robot-name: iajaBot
robot-cover-url:
robot-details-url: http://www.scs.carleton.ca/~morin/iajabot.html
robot-owner-name: Pat Morin
robot-owner-url: http://www.scs.carleton.ca/~morin/
robot-owner-email: morin@scs.carleton.ca
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix, windows
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent: iajabot
robot-noindex: no
robot-host: *.scs.carleton.ca
robot-from: no
robot-useragent: iajaBot/0.1
robot-language: c
robot-description: Finds adult content
robot-history: None, brand new.
robot-environment: research
modified-date: Tue, 27 Jun 2000, 11:17:50 EDT
modified-by: Pat Morin

robot-id:           ibm
robot-name:         IBM_Planetwide
robot-cover-url:    http://www.ibm.com/%7ewebmaster/
robot-details-url:
robot-owner-name:   Ed Costello
robot-owner-url:    http://www.ibm.com/%7ewebmaster/
robot-owner-email:  epc@www.ibm.com"
robot-status:
robot-purpose:      indexing, maintenance, mirroring
robot-type:         standalone and
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         www.ibm.com www2.ibm.com
robot-from:         yes
robot-useragent:    IBM_Planetwide, 
robot-language:     Perl5
robot-description:  Restricted to IBM owned or related domains.
robot-history:
robot-environment:
modified-date:      Mon Jan 22 22:09:19 1996.
modified-by:

robot-id: iconoclast
robot-name: Popular Iconoclast
robot-cover-url: http://gestalt.sewanee.edu/ic/
robot-details-url: http://gestalt.sewanee.edu/ic/info.html
robot-owner-name: Chris Cappuccio
robot-owner-url: http://sefl.satelnet.org/~ccappuc/
robot-owner-email: chris@gestalt.sewanee.edu
robot-status: development
robot-purpose: statistics 
robot-type: standalone
robot-platform: unix (OpenBSD)
robot-availability: source
robot-exclusion: no 
robot-exclusion-useragent:
robot-noindex: no
robot-host: gestalt.sewanee.edu
robot-from: yes 
robot-useragent: gestaltIconoclast/1.0 libwww-FM/2.17
robot-language: c,perl5
robot-description: This guy likes statistics
robot-history: This robot has a history in mathematics and english
robot-environment: research
modified-date: Wed, 5 Mar 1997 17:35:16 CST
modified-by: chris@gestalt.sewanee.edu

robot-id: Ilse
robot-name: Ingrid
robot-cover-url:
robot-details-url:
robot-owner-name: Ilse c.v.
robot-owner-url: http://www.ilse.nl/
robot-owner-email: ilse@ilse.nl
robot-status: Running
robot-purpose: Indexing
robot-type: Web Indexer
robot-platform: UNIX
robot-availability: Commercial as part of search engine package
robot-exclusion: Yes
robot-exclusion-useragent: INGRID/0.1
robot-noindex: Yes
robot-host: bart.ilse.nl
robot-from: Yes
robot-useragent: INGRID/0.1
robot-language: C
robot-description:  
robot-history:
robot-environment:
modified-date: 06/13/1997
modified-by: Ilse

robot-id: imagelock
robot-name: Imagelock 
robot-cover-url:
robot-details-url:
robot-owner-name: Ken Belanger  
robot-owner-url:
robot-owner-email: belanger@imagelock.com
robot-status: development
robot-purpose: maintenance      
robot-type:
robot-platform: windows95
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent:
robot-noindex: no
robot-host: 209.111.133.*
robot-from: no
robot-useragent: Mozilla 3.01 PBWF (Win95)
robot-language:
robot-description: searches for image links
robot-history:
robot-environment: service
modified-date: Tue, 11 Aug 1998 17:28:52 GMT
modified-by: brian@smithrenaud.com

robot-id:           incywincy
robot-name:         IncyWincy
robot-cover-url:    http://osiris.sunderland.ac.uk/sst-scripts/simon.html
robot-details-url:
robot-owner-name:   Simon Stobart
robot-owner-url:    http://osiris.sunderland.ac.uk/sst-scripts/simon.html
robot-owner-email:  simon.stobart@sunderland.ac.uk
robot-status:
robot-purpose:
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         osiris.sunderland.ac.uk
robot-from:         yes
robot-useragent:    IncyWincy/1.0b1
robot-language:     C++
robot-description:  Various Research projects at the University of
	Sunderland
robot-history:
robot-environment:
modified-date:      Fri Jan 19 21:50:32 1996.
modified-by:

robot-id: informant
robot-name: Informant
robot-cover-url: http://informant.dartmouth.edu/
robot-details-url: http://informant.dartmouth.edu/about.html
robot-owner-name: Bob Gray
robot-owner-name2: Aditya Bhasin
robot-owner-name3: Katsuhiro Moizumi
robot-owner-name4: Dr. George V. Cybenko
robot-owner-url: http://informant.dartmouth.edu/
robot-owner-email: info_adm@cosmo.dartmouth.edu
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent: Informant
robot-noindex: no
robot-host: informant.dartmouth.edu
robot-from: yes
robot-useragent: Informant
robot-language: c, c++
robot-description: The Informant robot continually checks the Web pages
 that are relevant to user queries.  Users are notified of any new or
 updated pages.  The robot runs daily, but the number of hits per site
 per day should be quite small, and these hits should be randomly
 distributed over several hours.  Since the robot does not actually 
 follow links (aside from those returned from the major search engines 
 such as Lycos), it does not fall victim to the common looping problems.
 The robot will support the Robot Exclusion Standard by early December, 1996.
robot-history: The robot is part of a research project at Dartmouth College.  
 The robot may become part of a commercial service (at which time it may be 
 subsumed by some other, existing robot).
robot-environment: research, service
modified-date: Sun, 3 Nov 1996 11:55:00 GMT
modified-by: Bob Gray

robot-id:           infoseek
robot-name:         InfoSeek Robot 1.0
robot-cover-url:    http://www.infoseek.com
robot-details-url:
robot-owner-name:   Steve Kirsch
robot-owner-url:    http://www.infoseek.com
robot-owner-email:  stk@infoseek.com
robot-status:       
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         corp-gw.infoseek.com
robot-from:         yes
robot-useragent:    InfoSeek Robot 1.0
robot-language:     python
robot-description:  Its purpose is to generate a Resource Discovery database.
	Collects WWW pages for both InfoSeek's free WWW search and
	commercial search. Uses a unique proprietary algorithm to
	identify the most popular and interesting WWW pages. Very
	fast, but never has more than one request per site
	outstanding at any given time. Has been refined for more
	than a year.
robot-history:      
robot-environment:
modified-date:      Sun May 28 01:35:48 1995
modified-by:

robot-id:           infoseeksidewinder
robot-name:         Infoseek Sidewinder
robot-cover-url:    http://www.infoseek.com/
robot-details-url:
robot-owner-name:   Mike Agostino
robot-owner-url:    http://www.infoseek.com/
robot-owner-email:  mna@infoseek.com
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    Infoseek Sidewinder
robot-language:     C Collects WWW pages for both InfoSeek's free WWW search
	services. Uses a unique, incremental, very fast proprietary
	algorithm to find WWW pages. 
robot-description:
robot-history:
robot-environment:
modified-date:      Sat Apr 27 01:20:15 1996.
modified-by:

robot-id: infospider
robot-name: InfoSpiders
robot-cover-url: http://www-cse.ucsd.edu/users/fil/agents/agents.html
robot-owner-name: Filippo Menczer
robot-owner-url: http://www-cse.ucsd.edu/users/fil/
robot-owner-email: fil@cs.ucsd.edu
robot-status: development
robot-purpose: search
robot-type: standalone
robot-platform: unix, mac
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: InfoSpiders
robot-noindex: no
robot-host: *.ucsd.edu
robot-from: yes
robot-useragent: InfoSpiders/0.1
robot-language: c, perl5
robot-description: application of artificial life algorithm to adaptive
 distributed information retrieval
robot-history: UC San Diego, Computer Science Dept. PhD research project
 (1995-97) under supervision of Prof. Rik Belew
robot-environment: research
modified-date: Mon, 16 Sep 1996 14:08:00 PDT

robot-id:  inspectorwww
robot-name:  Inspector Web
robot-cover-url:  http://www.greenpac.com/inspector/
robot-details-url:  http://www.greenpac.com/inspector/ourrobot.html
robot-owner-name:  Doug Green
robot-owner-url:  http://www.greenpac.com
robot-owner-email:  doug@greenpac.com
robot-status:  active:  robot significantly developed, but still undergoing fixes
robot-purpose:  maintentance:  link validation, html validation, image size
 validation, etc
robot-type:  standalone
robot-platform: unix
robot-availability:  free service and more extensive commercial service
robot-exclusion:  yes
robot-exclusion-useragent:  inspectorwww
robot-noindex:  no
robot-host:  www.corpsite.com, www.greenpac.com, 38.234.171.*
robot-from:  yes
robot-useragent:  inspectorwww/1.0 http://www.greenpac.com/inspectorwww.html
robot-language:  c
robot-description:  Provide inspection reports which give advise to WWW
 site owners on missing links, images resize problems, syntax errors, etc.
robot-history:  development started in Mar 1997
robot-environment:  commercial
modified-date:  Tue Jun 17 09:24:58 EST 1997
modified-by:  Doug Green

robot-id:           intelliagent
robot-name:         IntelliAgent
robot-cover-url:    http://www.geocities.com/SiliconValley/3086/iagent.html
robot-details-url:
robot-owner-name:   David Reilly
robot-owner-url:    http://www.geocities.com/SiliconValley/3086/index.html
robot-owner-email:  s1523@sand.it.bond.edu.au
robot-status:       development
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:         sand.it.bond.edu.au
robot-from:         no
robot-useragent:    'IAGENT/1.0'
robot-language:     C
robot-description:  IntelliAgent is still in development. Indeed, it is very far
	from completion. I'm planning to limit the depth at which it
	will probe, so hopefully IAgent won't cause anyone much of a
	problem. At the end of its completion, I hope to publish
	both the raw data and original source code.
robot-history:
robot-environment:
modified-date:      Fri May 31 02:10:39 1996.
modified-by:

robot-id: irobot
robot-name: I, Robot
robot-cover-url: http://irobot.mame.dk/
robot-details-url: http://irobot.mame.dk/about.phtml
robot-owner-name: [mame.dk]
robot-owner-url: http://www.mame.dk/
robot-owner-email: irobot@chaos.dk
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: irobot
robot-noindex: yes
robot-host: *.mame.dk, 206.161.121.*
robot-from: no
robot-useragent: I Robot 0.4 (irobot@chaos.dk)
robot-language: c
robot-description: I Robot is used to build a fresh database for the
 emulation community. Primary focus is information on emulation and
 especially old arcade machines. Primarily english sites will be indexed and
 only if they have their own domain. Sites are added manually on based on
 submitions after they has been evaluated.
robot-history: The robot was started in june 2000
robot-environment1: service
robot-environment2: hobby
modified-date: Fri, 27 Oct 2000 09:08:06 GMT
modified-by: BombJack mameadm@chaos.dk

robot-id:iron33
robot-name:Iron33
robot-cover-url:http://verno.ueda.info.waseda.ac.jp/iron33/
robot-details-url:http://verno.ueda.info.waseda.ac.jp/iron33/history.html
robot-owner-name:Takashi Watanabe
robot-owner-url:http://www.ueda.info.waseda.ac.jp/~watanabe/
robot-owner-email:watanabe@ueda.info.waseda.ac.jp
robot-status:active
robot-purpose:indexing, statistics
robot-type:standalone
robot-platform:unix
robot-availability:source
robot-exclusion:yes
robot-exclusion-useragent:Iron33
robot-noindex:no
robot-host:*.folon.ueda.info.waseda.ac.jp, 133.9.215.*
robot-from:yes
robot-useragent:Iron33/0.0
robot-language:c
robot-description:The robot "Iron33" is used to build the
                  database for the WWW search engine "Verno".
robot-history:
robot-environment:research
modified-date:Fri, 20 Mar 1998 18:34 JST
modified-by:Watanabe Takashi

robot-id:           israelisearch
robot-name:         Israeli-search
robot-cover-url:    http://www.idc.ac.il/Sandbag/
robot-details-url:
robot-owner-name:   Etamar Laron
robot-owner-url:    http://www.xpert.com/~etamar/
robot-owner-email:  etamar@xpert.co
robot-status:
robot-purpose:      indexing.
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         dylan.ius.cs.cmu.edu
robot-from:         no
robot-useragent:    IsraeliSearch/1.0
robot-language:     C A complete software designed to collect information in a
	distributed workload and supports context queries. Intended
	to be a complete updated resource for Israeli sites and
	information related to Israel or Israeli
	Society.
robot-description:
robot-history:
robot-environment:
modified-date:      Tue Apr 23 19:23:55 1996.
modified-by:

robot-id: javabee
robot-name: JavaBee
robot-cover-url: http://www.javabee.com
robot-details-url:
robot-owner-name:ObjectBox
robot-owner-url:http://www.objectbox.com/
robot-owner-email:info@objectbox.com
robot-status:Active
robot-purpose:Stealing Java Code
robot-type:standalone
robot-platform:Java
robot-availability:binary
robot-exclusion:no
robot-exclusion-useragent:
robot-noindex:no
robot-host:*
robot-from:no
robot-useragent:JavaBee
robot-language:Java
robot-description:This robot is used to grab java applets and run them
 locally overriding the security implemented
robot-history:
robot-environment:commercial
modified-date:
modified-by:

robot-id: JBot
robot-name: JBot Java Web Robot
robot-cover-url: http://www.matuschek.net/software/jbot
robot-details-url: http://www.matuschek.net/software/jbot
robot-owner-name: Daniel Matuschek
robot-owner-url: http://www.matuschek.net
robot-owner-email: daniel@matuschek.net
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: Java
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: JBot
robot-noindex: no
robot-host: *
robot-from: -
robot-useragent: JBot (but can be changed by the user)
robot-language: Java
robot-description: Java web crawler to download web sites
robot-history: -
robot-environment: hobby
modified-date: Thu, 03 Jan 2000 16:00:00 GMT
modified-by: Daniel Matuschek <daniel@matuschek.net>

robot-id: jcrawler
robot-name: JCrawler
robot-cover-url: http://www.nihongo.org/jcrawler/
robot-details-url:
robot-owner-name: Benjamin Franz
robot-owner-url: http://www.nihongo.org/snowhare/
robot-owner-email: snowhare@netimages.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: jcrawler
robot-noindex: yes
robot-host: db.netimages.com
robot-from: yes
robot-useragent: JCrawler/0.2
robot-language: perl5
robot-description: JCrawler is currently used to build the Vietnam topic
                   specific WWW index for VietGATE
                   <URL:http://www.vietgate.net/>. It schedules visits
                   randomly, but will not visit a site more than once
                   every two minutes. It uses a subject matter relevance
                   pruning algorithm to determine what pages to crawl
                   and index and will not generally index pages with
                   no Vietnam related content. Uses Unicode internally,
                   and detects and converts several different Vietnamese
                   character encodings.
robot-history:
robot-environment: service
modified-date: Wed, 08 Oct 1997 00:09:52 GMT
modified-by: Benjamin Franz

robot-id: askjeeves
robot-name: AskJeeves
robot-cover-url: http://www.ask.com
robot-details-url: 
robot-owner-name: Ask Jeeves, Inc.
robot-owner-url: http://www.ask.com
robot-owner-email: postmaster@ask.com
robot-status: active
robot-purpose: indexing, maintenance
robot-type: standalone
robot-platform: linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: "Teoma" or "Ask Jeeves" or "Jeeves"
robot-noindex: Yes
robot-host: ez*.directhit.com
robot-from: No
robot-useragent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma) 
robot-language: c++
robot-description: Ask Jeeves / Teoma spider
robot-history: Developed by Direct Hit Technologies which was aquired by
 Ask Jeeves in 2000.
robot-environment: service
modified-date: Fri Jan 17 15:20:08 EST 2003
modified-by: brucep@ask.com

robot-id: jobo
robot-name: JoBo Java Web Robot
robot-cover-url: http://www.matuschek.net/software/jobo/
robot-details-url: http://www.matuschek.net/software/jobo/
robot-owner-name: Daniel Matuschek
robot-owner-url: http://www.matuschek.net
robot-owner-email: daniel@matuschek.net
robot-status: active
robot-purpose: downloading, mirroring, indexing
robot-type: standalone
robot-platform: unix, windows, os/2, mac
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: jobo
robot-noindex: no
robot-host: *
robot-from: yes
robot-useragent: JoBo (can be modified by the user)
robot-language: java
robot-description: JoBo is a web site download tool. The core web spider can be used for any purpose.
robot-history: JoBo was developed as a simple download tool and became a full featured web spider during development
robot-environment: hobby
modified-date: Fri, 20 Apr 2001 17:00:00 GMT
modified-by: Daniel Matuschek <daniel@matuschek.net>

robot-id:           jobot
robot-name:         Jobot
robot-cover-url:    http://www.micrognosis.com/~ajack/jobot/jobot.html
robot-details-url:
robot-owner-name:   Adam Jack
robot-owner-url:    http://www.micrognosis.com/~ajack/index.html
robot-owner-email:  ajack@corp.micrognosis.com
robot-status:       inactive
robot-purpose:      standalone
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         supernova.micrognosis.com
robot-from:         yes
robot-useragent:    Jobot/0.1alpha libwww-perl/4.0
robot-language:     perl 4
robot-description:  Its purpose is to generate a Resource Discovery database.
	Intended to seek out sites of potential "career interest".
	Hence - Job Robot.
robot-history:      
robot-environment:
modified-date:      Tue Jan  9 18:55:55 1996
modified-by:

robot-id:           joebot
robot-name:         JoeBot
robot-cover-url:
robot-details-url:
robot-owner-name:   Ray Waldin
robot-owner-url:    http://www.primenet.com/~rwaldin
robot-owner-email:  rwaldin@primenet.com
robot-status:
robot-purpose:
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    JoeBot/x.x, 
robot-language:     java JoeBot is a generic web crawler implemented as a
	collection of Java classes which can be used in a variety of
	applications, including resource discovery, link validation,
	mirroring, etc.  It currently limits itself to one visit per
	host per minute.
robot-description:
robot-history:
robot-environment:
modified-date:      Sun May 19 08:13:06 1996.
modified-by:

robot-id:           jubii
robot-name:         The Jubii Indexing Robot
robot-cover-url:    http://www.jubii.dk/robot/default.htm
robot-details-url:
robot-owner-name:   Jakob Faarvang
robot-owner-url:    http://www.cybernet.dk/staff/jakob/
robot-owner-email:  jakob@jubii.dk
robot-status:       
robot-purpose:      indexing, maintainance
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         any host in the cybernet.dk domain
robot-from:         yes
robot-useragent:    JubiiRobot/version#
robot-language:     visual basic 4.0
robot-description:  Its purpose is to generate a Resource Discovery database,
	and validate links. Used for indexing the .dk top-level
	domain as well as other Danish sites for aDanish web
	database, as well as link validation.
robot-history:      Will be in constant operation from Spring
	1996
robot-environment:
modified-date:      Sat Jan  6 20:58:44 1996
modified-by:

robot-id:           jumpstation
robot-name:         JumpStation
robot-cover-url:    http://js.stir.ac.uk/jsbin/jsii
robot-details-url:
robot-owner-name:   Jonathon Fletcher
robot-owner-url:    http://www.stir.ac.uk/~jf1
robot-owner-email:  j.fletcher@stirling.ac.uk 
robot-status:       retired
robot-purpose:      indexing
robot-type:
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         *.stir.ac.uk
robot-from:         yes
robot-useragent:    jumpstation
robot-language:     perl, C, c++
robot-description:
robot-history:      Originated as a weekend project in 1993.
robot-environment:
modified-date:      Tue May 16 00:57:42 1995.
modified-by:

robot-id: kapsi
robot-name: image.kapsi.net
robot-cover-url: http://image.kapsi.net/
robot-details-url: http://image.kapsi.net/index.php?page=robot
robot-owner-name: Jaakko Heusala
robot-owner-url: http://huoh.kapsi.net/
robot-owner-email: Jaakko.Heusala@kapsi.net
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: image.kapsi.net
robot-noindex: no
robot-host: addr-212-50-142-138.suomi.net
robot-from: yes
robot-useragent: image.kapsi.net/1.0
robot-language: perl
robot-description: The image.kapsi.net robot is used to build the database for the image.kapsi.net search service. The robot runs currently in a random times.
robot-history: The Robot was build for image.kapsi.net's database in year 2001.
robot-environment: hobby, research
modified-date: Thu, 13 Dec 2001 23:28:23 EET
modified-by:

robot-id:           katipo
robot-name:         Katipo
robot-cover-url:    http://www.vuw.ac.nz/~newbery/Katipo.html
robot-details-url:  http://www.vuw.ac.nz/~newbery/Katipo/Katipo-doc.html
robot-owner-name:   Michael Newbery
robot-owner-url:    http://www.vuw.ac.nz/~newbery
robot-owner-email:  Michael.Newbery@vuw.ac.nz
robot-status:       active
robot-purpose:      maintenance
robot-type:         standalone
robot-platform:     Macintosh
robot-availability: binary
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         yes
robot-useragent:    Katipo/1.0
robot-language:     c
robot-description:  Watches all the pages you have previously visited
	and tells you when they have changed.
robot-history:      
robot-environment:  commercial (free)
modified-date:      Tue, 25 Jun 96 11:40:07 +1200
modified-by:        Michael Newbery

robot-id:               kdd
robot-name:             KDD-Explorer
robot-cover-url:        http://mlc.kddvw.kcom.or.jp/CLINKS/html/clinks.html
robot-details-url:      not available
robot-owner-name:       Kazunori Matsumoto
robot-owner-url:        not available
robot-owner-email:      matsu@lab.kdd.co.jp
robot-status:           development (to be avtive in June 1997)
robot-purpose:          indexing
robot-type:             standalone
robot-platform:         unix
robot-availability:     none
robot-exclusion:        yes
robot-exclusion-useragent:KDD-Explorer
robot-noindex:          no
robot-host:             mlc.kddvw.kcom.or.jp
robot-from:             yes
robot-useragent:        KDD-Explorer/0.1
robot-language:         c
robot-description:      KDD-Explorer is used for indexing valuable documents
                which will be retrieved via an experimental cross-language
                search engine, CLINKS.
robot-history:          This robot was designed in Knowledge-bases Information
                        processing Laboratory, KDD R&D Laboratories, 1996-1997
robot-environment:      research
modified-date:          Mon, 2 June 1997 18:00:00 JST
modified-by:            Kazunori Matsumoto

robot-id:kilroy
robot-name:Kilroy
robot-cover-url:http://purl.org/kilroy
robot-details-url:http://purl.org/kilroy
robot-owner-name:OCLC
robot-owner-url:http://www.oclc.org
robot-owner-email:kilroy@oclc.org
robot-status:active
robot-purpose:indexing,statistics
robot-type:standalone
robot-platform:unix,windowsNT
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:*
robot-noindex:no
robot-host:*.oclc.org
robot-from:no
robot-useragent:yes
robot-language:java
robot-description:Used to collect data for several projects. 
 Runs constantly and visits site no faster than once every 90 seconds.
robot-history:none
robot-environment:research,service
modified-date:Thursday, 24 Apr 1997 20:00:00 GMT
modified-by:tkac

robot-id: ko_yappo_robot
robot-name: KO_Yappo_Robot
robot-cover-url: http://yappo.com/info/robot.html
robot-details-url: http://yappo.com/
robot-owner-name: Kazuhiro Osawa
robot-owner-url: http://yappo.com/
robot-owner-email: office_KO@yappo.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: ko_yappo_robot
robot-noindex: yes
robot-host: yappo.com,209.25.40.1
robot-from: yes
robot-useragent: KO_Yappo_Robot/1.0.4(http://yappo.com/info/robot.html)
robot-language: perl
robot-description: The KO_Yappo_Robot robot is used to build the database
           for the Yappo search service by k,osawa
           (part of AOL).
           The robot runs random day, and visits sites in a random order.
robot-history: The robot is hobby of k,osawa
           at the Tokyo in 1997
robot-environment: hobby
modified-date: Fri, 18 Jul 1996 12:34:21 GMT
modified-by: KO

robot-id: labelgrabber.txt
robot-name: LabelGrabber
robot-cover-url: http://www.w3.org/PICS/refcode/LabelGrabber/index.htm
robot-details-url: http://www.w3.org/PICS/refcode/LabelGrabber/index.htm
robot-owner-name: Kyle Jamieson
robot-owner-url: http://www.w3.org/PICS/refcode/LabelGrabber/index.htm
robot-owner-email: jamieson@mit.edu
robot-status: active
robot-purpose: Grabs PICS labels from web pages, submits them to a label bueau
robot-type: standalone
robot-platform: windows, windows95, windowsNT, unix
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: label-grabber
robot-noindex: no
robot-host: head.w3.org
robot-from: no
robot-useragent: LabelGrab/1.1
robot-language: java
robot-description: The label grabber searches for PICS labels and submits
 them to a label bureau
robot-history: N/A
robot-environment: research
modified-date: Wed, 28 Jan 1998 17:32:52 GMT
modified-by: jamieson@mit.edu

robot-id: larbin
robot-name: larbin
robot-cover-url: http://para.inria.fr/~ailleret/larbin/index-eng.html
robot-owner-name: Sebastien Ailleret
robot-owner-url: http://para.inria.fr/~ailleret/
robot-owner-email: sebastien.ailleret@inria.fr
robot-status: active
robot-purpose: Your imagination is the only limit
robot-type: standalone
robot-platform: Linux
robot-availability: source (GPL), mail me for customization
robot-exclusion: yes
robot-exclusion-useragent: larbin
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: larbin (+mail)
robot-language: c++
robot-description: Parcourir le web, telle est ma passion
robot-history: french research group (INRIA Verso)
robot-environment: hobby
modified-date: 2000-3-28
modified-by: Sebastien Ailleret

robot-id: legs
robot-name: legs
robot-cover-url: http://www.MagPortal.com/
robot-details-url:
robot-owner-name: Bill Dimm
robot-owner-url: http://www.HotNeuron.com/
robot-owner-email: admin@magportal.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: legs
robot-noindex: no
robot-host:
robot-from: yes
robot-useragent: legs
robot-language: perl5
robot-description: The legs robot is used to build the magazine article
 database for MagPortal.com.
robot-history:
robot-environment: service
modified-date: Wed, 22 Mar 2000 14:10:49 GMT
modified-by: Bill Dimm

robot-id: linkidator
robot-name: Link Validator
robot-cover-url:
robot-details-url:
robot-owner-name: Thomas Gimon
robot-owner-url:
robot-owner-email: tgimon@mitre.org
robot-status: development
robot-purpose: maintenance
robot-type: standalone
robot-platform: unix, windows
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Linkidator
robot-noindex: yes
robot-nofollow: yes
robot-host: *.mitre.org
robot-from: yes
robot-useragent: Linkidator/0.93
robot-language: perl5
robot-description: Recursively checks all links on a site, looking for
 broken or redirected links.  Checks all off-site links using HEAD
 requests and does not progress further.  Designed to behave well and to
 be very configurable.
robot-history: Built using WWW-Robot-0.022 perl module.  Currently in
 beta test.  Seeking approval for public release.
robot-environment: internal
modified-date: Fri, 20 Jan 2001 02:22:00 EST
modified-by: Thomas Gimon

robot-id:linkscan
robot-name:LinkScan
robot-cover-url:http://www.elsop.com/
robot-details-url:http://www.elsop.com/linkscan/overview.html
robot-owner-name:Electronic Software Publishing Corp. (Elsop)
robot-owner-url:http://www.elsop.com/
robot-owner-email:sales@elsop.com
robot-status:Robot actively in use
robot-purpose:Link checker, SiteMapper, and HTML Validator
robot-type:Standalone
robot-platform:Unix, Linux, Windows 98/NT
robot-availability:Program is shareware
robot-exclusion:No
robot-exclusion-useragent:
robot-noindex:Yes
robot-host:*
robot-from:
robot-useragent:LinkScan Server/5.5 | LinkScan Workstation/5.5
robot-language:perl5
robot-description:LinkScan checks links, validates HTML and creates site maps
robot-history: First developed by Elsop in January,1997
robot-environment:Commercial
modified-date:Fri, 3 September 1999 17:00:00 PDT
modified-by: Kenneth R. Churilla

robot-id: linkwalker
robot-name: LinkWalker
robot-cover-url: http://www.seventwentyfour.com
robot-details-url: http://www.seventwentyfour.com/tech.html
robot-owner-name: Roy Bryant
robot-owner-url: 
robot-owner-email: rbryant@seventwentyfour.com
robot-status: active
robot-purpose: maintenance, statistics
robot-type: standalone
robot-platform: windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: linkwalker
robot-noindex: yes
robot-host: *.seventwentyfour.com
robot-from: yes
robot-useragent: LinkWalker
robot-language: c++
robot-description: LinkWalker generates a database of links.
 We send reports of bad ones to webmasters.
robot-history: Constructed late 1997 through April 1998.
 In full service April 1998.
robot-environment: service
modified-date: Wed, 22 Apr 1998
modified-by: Roy Bryant

robot-id:lockon
robot-name:Lockon
robot-cover-url:
robot-details-url:
robot-owner-name:Seiji Sasazuka & Takahiro Ohmori
robot-owner-url:
robot-owner-email:search@rsch.tuis.ac.jp
robot-status:active
robot-purpose:indexing
robot-type:standalone
robot-platform:UNIX
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:Lockon
robot-noindex:yes
robot-host:*.hitech.tuis.ac.jp
robot-from:yes
robot-useragent:Lockon/xxxxx
robot-language:perl5 
robot-description:This robot gathers only HTML document.
robot-history:This robot was developed in the Tokyo university of information sciences in 1998.
robot-environment:research
modified-date:Tue. 10 Nov 1998 20:00:00 GMT
modified-by:Seiji Sasazuka & Takahiro Ohmori

robot-id:logo_gif
robot-name: logo.gif Crawler
robot-cover-url: http://www.inm.de/projects/logogif.html
robot-details-url:
robot-owner-name: Sevo Stille
robot-owner-url: http://www.inm.de/people/sevo
robot-owner-email: sevo@inm.de
robot-status: under development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: logo_gif_crawler
robot-noindex: no
robot-host: *.inm.de
robot-from: yes
robot-useragent: logo.gif crawler
robot-language: perl
robot-description: meta-indexing engine for corporate logo graphics
 The robot runs at irregular intervals and will only pull a start page and
 its associated /.*logo\.gif/i (if any). It will be terminated once a
 statistically
 significant number of samples has been collected.
robot-history: logo.gif is part of the design diploma of Markus Weisbeck,
 and tries to analyze the abundance of the logo metaphor in WWW
 corporate design.
 The crawler and image database were written by Sevo Stille and Peter
 Frank of the Institut fr Neue Medien, respectively.
robot-environment: research, statistics
modified-date: 25.5.97
modified-by: Sevo Stille

robot-id:           lycos
robot-name:         Lycos
robot-cover-url:    http://lycos.cs.cmu.edu/
robot-details-url:
robot-owner-name:   Dr. Michael L. Mauldin
robot-owner-url:    http://fuzine.mt.cs.cmu.edu/mlm/home.html
robot-owner-email:  fuzzy@cmu.edu
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         fuzine.mt.cs.cmu.edu, lycos.com
robot-from:         
robot-useragent:    Lycos/x.x
robot-language:     
robot-description:  This is a research program in providing information
	retrieval and discovery in the WWW, using a finite memory
	model of the web to guide intelligent, directed searches for
	specific  information needs
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id:           macworm
robot-name:         Mac WWWWorm
robot-cover-url:    
robot-details-url:
robot-owner-name:   Sebastien Lemieux
robot-owner-url:    
robot-owner-email:  lemieuse@ERE.UMontreal.CA
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     Macintosh
robot-availability: none
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         
robot-useragent:    
robot-language:     hypercard
robot-description:  a French Keyword-searching robot for the Mac The author has
	decided not to release this robot to the
	public
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: magpie
robot-name: Magpie
robot-cover-url:
robot-details-url:
robot-owner-name: Keith Jones
robot-owner-url: 
robot-owner-email: Keith.Jones@blueberry.co.uk
robot-status: development
robot-purpose: indexing, statistics
robot-type: standalone
robot-platform: unix
robot-availability:
robot-exclusion: no
robot-exclusion-useragent:
robot-noindex: no
robot-host: *.blueberry.co.uk, 194.70.52.*, 193.131.167.144
robot-from: no
robot-useragent: Magpie/1.0
robot-language: perl5
robot-description: Used to obtain information from a specified list of web pages for local indexing. Runs every two hours, and visits only a small number of sites.
robot-history: Part of a research project. Alpha testing from 10 July 1996, Beta testing from 10 September.
robot-environment: research
modified-date: Wed, 10 Oct 1996 13:15:00 GMT
modified-by: Keith Jones

robot-id: marvin
robot-name: marvin/infoseek
robot-details-url:
robot-cover-url: http://www.infoseek.de/
robot-owner-name: WSI Webseek Infoservice GmbH & Co KG.
robot-owner-url: http://www.infoseek.de/
robot-owner-email: marvin-team@webseek.de
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: marvin
robot-noindex: yes
robot-nofollow: yes
robot-host: arthur*.sda.t-online.de
robot-from: yes
robot-useragent: marvin/infoseek (marvin-team@webseek.de)
robot-language: java
robot-description: 
robot-history: day of birth: 4.2. 2001 - replaces Infoseek Sidewinder
robot-environment: comercial
modified-date: Fri, 11 May 2001 17:28:52 GMT

robot-id: mattie
robot-name: Mattie
robot-cover-url: http://www.mcw.aarkayn.org
robot-details-url: http://www.mcw.aarkayn.org/web/mattie.asp
robot-owner-name: Matt
robot-owner-url: http://www.mcw.aarkayn.org
robot-owner-email: matt@mcw.aarkayn.org
robot-status: Active
robot-purpose: Procurement Spider
robot-type: Standalone
robot-platform: UNIX
robot-availability: None
robot-exclusion: Yes
robot-exclusion-useragent: mattie
robot-noindex: N/A
robot-nofollow: Yes
robot-host: mattie.mcw.aarkayn.org
robot-from: Yes
robot-useragent: M/3.8
robot-language: C++
robot-description: Mattie is an all-source procurement spider.
robot-history: Created 2000 Mar. 03 Fri. 18:48:16 -0500 GMT (R) as an MP3
 spider, Mattie was reborn 2002 Jul. 07 Sun. 03:47:29 -0500 GMT (R) as an
 all-source procurement spider.
robot-environment: Hobby
modified-date: Fri, 13 Sep 2002 00:36:13 GMT
modified-by: Matt

robot-id: mediafox
robot-name: MediaFox
robot-cover-url: none
robot-details-url: none
robot-owner-name: Lars Eilebrecht   
robot-owner-url: http://www.home.unix-ag.org/sfx/
robot-owner-email: sfx@uni-media.de
robot-status: development
robot-purpose: indexing and maintenance
robot-type: standalone
robot-platform: (Java)
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: mediafox
robot-noindex: yes
robot-host: 141.99.*.*
robot-from: yes
robot-useragent: MediaFox/x.y
robot-language: Java
robot-description: The robot is used to index meta information of a
                   specified set of documents and update a database
                   accordingly.
robot-history: Project at the University of Siegen
robot-environment: research
modified-date: Fri Aug 14 03:37:56 CEST 1998
modified-by: Lars Eilebrecht

robot-id:merzscope
robot-name:MerzScope
robot-cover-url:http://www.merzcom.com
robot-details-url:http://www.merzcom.com
robot-owner-name:(Client based robot)
robot-owner-url:(Client based robot)
robot-owner-email:
robot-status:actively in use
robot-purpose:WebMapping
robot-type:standalone
robot-platform:	(Java Based) unix,windows95,windowsNT,os2,mac etc ..
robot-availability:binary
robot-exclusion: yes
robot-exclusion-useragent: MerzScope
robot-noindex: no
robot-host:(Client Based)
robot-from:
robot-useragent: MerzScope
robot-language:	java
robot-description: Robot is part of a Web-Mapping package called MerzScope,
	 to be used mainly by consultants, and web masters to create and
	 publish maps, on and of the World wide web.
robot-history: 
robot-environment:
modified-date: Fri, 13 March 1997 16:31:00
modified-by: Philip Lenir, MerzScope lead developper

robot-id:		meshexplorer
robot-name:		NEC-MeshExplorer
robot-cover-url:	http://netplaza.biglobe.or.jp/
robot-details-url:	http://netplaza.biglobe.or.jp/keyword.html
robot-owner-name:	web search service maintenance group
robot-owner-url:	http://netplaza.biglobe.or.jp/keyword.html
robot-owner-email:	web-dir@mxa.meshnet.or.jp
robot-status:		active
robot-purpose:		indexing
robot-type:		standalone
robot-platform:		unix
robot-availability:	none
robot-exclusion:	yes
robot-exclusion-useragent:	NEC-MeshExplorer
robot-noindex:		no
robot-host:		meshsv300.tk.mesh.ad.jp
robot-from:		yes
robot-useragent:	NEC-MeshExplorer
robot-language:		c
robot-description:	The NEC-MeshExplorer robot is used to build database for the NETPLAZA
 search service operated by NEC Corporation. The robot searches URLs
 around sites in japan(JP domain).
 The robot runs every day, and visits sites in a random order.
robot-history: Prototype version of this robot was developed in C&C Research
 Laboratories, NEC Corporation. Current robot (Version 1.0) is based
 on the prototype and has more functions.
robot-environment:	research
modified-date:		Jan 1, 1997
modified-by:		Nobuya Kubo, Hajime Takano

robot-id: MindCrawler
robot-name: MindCrawler
robot-cover-url: http://www.mindpass.com/_technology_faq.htm
robot-details-url:
robot-owner-name: Mindpass
robot-owner-url: http://www.mindpass.com/
robot-owner-email: support@mindpass.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: MindCrawler
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: MindCrawler
robot-language: c++
robot-description: 
robot-history:
robot-environment:
modified-date: Tue Mar 28 11:30:09 CEST 2000
modified-by:

robot-id: mnogosearch
robot-name: mnoGoSearch search engine software
robot-cover-url: http://www.mnogosearch.org
robot-details-url: http://www.mnogosearch.org/features.html
robot-owner-name: Lavtech.com corp.
robot-owner-url: http://www.mnogosearch.org
robot-owner-email: support@mnogosearch.org
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix, windows, mac
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: udmsearch
robot-noindex: yes
robot-host: *
robot-from: no
robot-useragent: UdmSearch
robot-language: c
robot-description: mnoGoSearch search engine software (formerly known
 as UDMSearch) is an advanced search solution for large-scale websites
 and Intranet. It is based on SQL database and supports numerous
 features.
robot-history: Formerly known as UDMSearch was developed as the search
  engine for the Russian republic of Udmurtia.
robot-environment: commercial
modified-date: Wed, 12 Sept 2001
modified-by: Dmitry Tkatchenko

robot-id:moget
robot-name:moget
robot-cover-url:
robot-details-url:
robot-owner-name:NTT-ME Infomation Xing,Inc
robot-owner-url:http://www.nttx.co.jp
robot-owner-email:moget@goo.ne.jp
robot-status:active
robot-purpose:indexing,statistics
robot-type:standalone
robot-platform:unix
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:moget
robot-noindex:yes
robot-host:*.goo.ne.jp
robot-from:yes
robot-useragent:moget/1.0
robot-language:c
robot-description: This robot is used to build the database for the search service operated by goo
robot-history:
robot-environment:service
modified-date:Thu, 30 Mar 2000 18:40:37 GMT
modified-by:moget@goo.ne.jp

robot-id:           momspider
robot-name:         MOMspider
robot-cover-url:    http://www.ics.uci.edu/WebSoft/MOMspider/
robot-details-url:
robot-owner-name:   Roy T. Fielding
robot-owner-url:    http://www.ics.uci.edu/dir/grad/Software/fielding
robot-owner-email:  fielding@ics.uci.edu
robot-status:       active
robot-purpose:      maintenance, statistics
robot-type:         standalone
robot-platform:     UNIX
robot-availability: source
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         yes
robot-useragent:    MOMspider/1.00 libwww-perl/0.40
robot-language:     perl 4
robot-description:  to validate links, and generate statistics. It's usually run
	from anywhere
robot-history:      Originated as a research project at the University of
	California, Irvine, in 1993. Presented at the First
	International WWW Conference in Geneva, 1994.
robot-environment:
modified-date:      Sat May 6 08:11:58 1995	
modified-by:        fielding@ics.uci.edu

robot-id:           monster
robot-name:         Monster
robot-cover-url:    http://www.neva.ru/monster.list/russian.www.html
robot-details-url:  
robot-owner-name:   Dmitry Dicky
robot-owner-url:    http://wild.stu.neva.ru/
robot-owner-email:  diwil@wild.stu.neva.ru
robot-status:       active
robot-purpose:      maintenance, mirroring
robot-type:         standalone
robot-platform:     UNIX (Linux)
robot-availability: binary
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         wild.stu.neva.ru
robot-from:         
robot-useragent:    Monster/vX.X.X -$TYPE ($OSTYPE)
robot-language:     C
robot-description:  The Monster has two parts - Web searcher and Web analyzer.
	Searcher is intended to perform the list of WWW sites of 
	desired domain (for example it can perform list of all 
	WWW sites of mit.edu, com, org, etc... domain)
	In the User-agent field $TYPE is set to 'Mapper' for Web searcher
	and 'StAlone' for Web analyzer. 
robot-history:      Now the full (I suppose) list of ex-USSR sites is produced.
robot-environment:  
modified-date:      Tue Jun 25 10:03:36 1996
modified-by:

robot-id: motor
robot-name: Motor
robot-cover-url: http://www.cybercon.de/Motor/index.html
robot-details-url:
robot-owner-name: Mr. Oliver Runge, Mr. Michael Goeckel
robot-owner-url: http://www.cybercon.de/index.html
robot-owner-email: Motor@cybercon.technopark.gmd.de
robot-status: developement
robot-purpose: indexing
robot-type: standalone
robot-platform: mac
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: Motor
robot-noindex: no
robot-host: Michael.cybercon.technopark.gmd.de
robot-from: yes
robot-useragent: Motor/0.2
robot-language: 4th dimension
robot-description: The Motor robot is used to build the database for the 
 www.webindex.de search service operated by CyberCon. The robot ios under 
 development - it runs in random intervals and visits site in a priority 
 driven order (.de/.ch/.at first, root and robots.txt first)
robot-history: 
robot-environment: service
modified-date: Wed, 3 Jul 1996 15:30:00 +0100
modified-by: Michael Goeckel (Michael@cybercon.technopark.gmd.de)

robot-id: msnbot
robot-name: MSNBot
robot-cover-url: http://search.msn.com
robot-details-url: http://search.msn.com/msnbot.htm
robot-owner-name: Microsoft Corp.
robot-owner-url: http://www.microsoft.com
robot-owner-email: msnbot@microsoft.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Windows Server 2000, Windows Server 2003
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: msnbot
robot-noindex: yes
robot-host: <TBD>
robot-from: yes
robot-useragent: MSNBOT/0.1 (http://search.msn.com/msnbot.htm)
robot-language: C++
robot-description: MSN Search Crawler
robot-history: Developed by Microsoft Corp.
robot-environment: commercial
modified-date: June 23, 2003
modified-by: msnbot@microsoft.com

robot-id: muncher
robot-name: Muncher
robot-details-url: http://www.goodlookingcooking.co.uk/info.htm
robot-cover-url: http://www.goodlookingcooking.co.uk
robot-owner-name: Chris Ridings
robot-owner-url: http://www.goodlookingcooking.co.uk
robot-owner-email: muncher@ridings.org.uk
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: muncher
robot-noindex: yes
robot-nofollow: yes
robot-host: www.goodlookingcooking.co.uk
robot-from: no
robot-useragent: yes
robot-language: perl
robot-description: Used to build the index for www.goodlookingcooking.co.uk.
 Seeks out cooking and recipe pages.
robot-history: Private project september 2001
robot-environment: hobby
modified-date: Wed, 5 Sep 2001 19:21:00 GMT

robot-id: muninn
robot-name: Muninn
robot-cover-url: http://people.freenet.de/Muninn/eyrie.html
robot-details-url: http://people.freenet.de/Muninn/
robot-owner-name: Sandra Groth
robot-owner-url: http://santana.dynalias.net/
robot-owner-email: muninn_bot@gmx.net
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: source, data
robot-exclusion: yes
robot-exclusion-useragent: muninn
robot-noindex: yes
robot-nofollow: yes
robot-host: santana.dynalias.net, 80.185.*, *
robot-from: yes
robot-useragent: Muninn/0.1 libwww-perl-5.76
 (http://people.freenet.de/Muninn/)
robot-language: Perl5
robot-description: Muninn looks at museums within my reach and tells me about
 current exhibitions.
robot-history: It's hard to keep track of things. Automation helps.
robot-environment: hobby
modified-date: Thu Jun  3 16:36:47 CEST 2004
modified-by: Sandra Groth

robot-id: muscatferret
robot-name: Muscat Ferret
robot-cover-url: http://www.muscat.co.uk/euroferret/
robot-details-url:
robot-owner-name: Olly Betts
robot-owner-url: http://www.muscat.co.uk/~olly/
robot-owner-email: olly@muscat.co.uk
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: MuscatFerret
robot-noindex: yes
robot-host: 193.114.89.*, 194.168.54.11
robot-from: yes
robot-useragent: MuscatFerret/<version>
robot-language: c, perl5
robot-description: Used to build the database for the EuroFerret
 <URL:http://www.muscat.co.uk/euroferret/>
robot-history:
robot-environment: service
modified-date: Tue, 21 May 1997 17:11:00 GMT
modified-by: olly@muscat.co.uk

robot-id: mwdsearch
robot-name: Mwd.Search
robot-cover-url: (none)
robot-details-url: (none)
robot-owner-name: Antti Westerberg
robot-owner-url: (none)
robot-owner-email: Antti.Westerberg@mwd.sci.fi
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix (Linux)
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: MwdSearch
robot-noindex: yes
robot-host: *.fifi.net
robot-from: no
robot-useragent: MwdSearch/0.1
robot-language: perl5, c
robot-description: Robot for indexing finnish (toplevel domain .fi)
                   webpages for search engine called Fifi.
                   Visits sites in random order.
robot-history: (none)
robot-environment: service (+ commercial)mwd.sci.fi>
modified-date: Mon, 26 May 1997 15:55:02 EEST
modified-by: Antti.Westerberg@mwd.sci.fi

robot-id: myweb
robot-name: Internet Shinchakubin
robot-cover-url: http://naragw.sharp.co.jp/myweb/home/
robot-details-url:
robot-owner-name: SHARP Corp.
robot-owner-url: http://naragw.sharp.co.jp/myweb/home/
robot-owner-email: shinchakubin-request@isl.nara.sharp.co.jp
robot-status: active
robot-purpose: find new links and changed pages
robot-type: standalone
robot-platform: Windows98
robot-availability: binary as bundled software
robot-exclusion: yes
robot-exclusion-useragent: sharp-info-agent
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: User-Agent: Mozilla/4.0 (compatible; sharp-info-agent v1.0; )
robot-language: Java
robot-description: makes a list of new links and changed pages based
      on  user's frequently clicked pages in the past 31 days.
      client may run this software one or few times every day, manually or
      specified time.
robot-history: shipped for SHARP's PC users since Feb 2000
robot-environment: commercial
modified-date: Fri, 30 Jun 2000 19:02:52 JST
modified-by: Katsuo Doi <doi@isl.nara.sharp.co.jp>

robot-id: NDSpider
robot-name: NDSpider
robot-cover-url: http://www.NationalDirectory.com/addurl
robot-details-url: http://www.NationalDirectory.com/addurl
robot-owner-name: NationalDirectory.com
robot-owner-url: http://www.NationalDirectory.com
robot-owner-email: dns3@NationalDirectory.com
robot-status: Active
robot-purpose: Indexing
robot-type: Standalone
robot-platform: Unix platform
robot-availability: None
robot-exclusion: Yes
robot-exclusion-useragent:
robot-noindex:
robot-host: Blowfish.NationalDirectory.net
robot-from:
robot-useragent: NDSpider/1.5
robot-language: C
robot-description: It is designed to index the web.
robot-history: Development started on  05 December 1996
robot-environment: UNIX
modified-date: 14 March 2004
modified-by:

robot-id:           netcarta
robot-name:         NetCarta WebMap Engine
robot-cover-url:    http://www.netcarta.com/
robot-details-url:
robot-owner-name:   NetCarta WebMap Engine
robot-owner-url:    http://www.netcarta.com/
robot-owner-email:  info@netcarta.com
robot-status:
robot-purpose:      indexing, maintenance, mirroring, statistics
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    NetCarta CyberPilot Pro
robot-language:     C++.
robot-description:  The NetCarta WebMap Engine is a general purpose, commercial
	spider. Packaged with a full GUI in the CyberPilo Pro
	product, it acts as a personal spider to work with a browser
	to facilitiate context-based navigation.  The WebMapper
	product uses the robot to manage a site (site copy, site
	diff, and extensive link management facilities).  All
	versions can create publishable NetCarta WebMaps, which
	capture the crawled information.  If the robot sees a
	published map, it will return the published map rather than
	continuing its crawl. Since this is a personal spider, it
	will be launched from multiple domains. This robot tends to
	focus on a particular site.  No instance of the robot should
	have more than one outstanding request out to any given site
	at a time. The User-agent field contains a coded ID
	identifying the instance of the spider; specific users can
	be blocked via robots.txt using this ID.
robot-history:
robot-environment:
modified-date:      Sun Feb 18 02:02:49 1996.
modified-by:

robot-id:  netmechanic
robot-name:  NetMechanic
robot-cover-url: http://www.netmechanic.com
robot-details-url: http://www.netmechanic.com/faq.html
robot-owner-name: Tom Dahm
robot-owner-url:  http://iquest.com/~tdahm
robot-owner-email: tdahm@iquest.com
robot-status: development
robot-purpose: Link and HTML validation
robot-type: standalone with web gateway
robot-platform: UNIX
robot-availability: via web page
robot-exclusion: Yes
robot-exclusion-useragent: WebMechanic
robot-noindex: no
robot-host: 206.26.168.18
robot-from: no
robot-useragent: NetMechanic
robot-language: C
robot-description:  NetMechanic is a link validation and
 HTML validation robot run using a web page interface.
robot-history:
robot-environment:
modified-date: Sat, 17 Aug 1996 12:00:00 GMT
modified-by:

robot-id: netscoop
robot-name: NetScoop
robot-cover-url: http://www-a2k.is.tokushima-u.ac.jp/search/index.html
robot-owner-name: Kenji Kita
robot-owner-url: http://www-a2k.is.tokushima-u.ac.jp/member/kita/index.html
robot-owner-email: kita@is.tokushima-u.ac.jp
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: UNIX
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: NetScoop
robot-host: alpha.is.tokushima-u.ac.jp, beta.is.tokushima-u.ac.jp
robot-useragent: NetScoop/1.0 libwww/5.0a
robot-language: C
robot-description: The NetScoop robot is used to build the database
                   for the NetScoop search engine.
robot-history: The robot has been used in the research project
               at the Faculty of Engineering, Tokushima University, Japan.,
               since Dec. 1996.
robot-environment: research
modified-date: Fri, 10 Jan 1997.
modified-by: Kenji Kita

robot-id: newscan-online
robot-name: newscan-online
robot-cover-url: http://www.newscan-online.de/
robot-details-url: http://www.newscan-online.de/info.html
robot-owner-name: Axel Mueller
robot-owner-url:
robot-owner-email: mueller@newscan-online.de
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Linux
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: newscan-online
robot-noindex: no
robot-host: *newscan-online.de
robot-from: yes
robot-useragent: newscan-online/1.1
robot-language: perl
robot-description: The newscan-online robot is used to build a database for
 the newscan-online news search service operated by smart information
 services. The robot runs daily and visits predefined sites in a random order.
robot-history: This robot finds its roots in a prereleased software for
 news filtering for Lotus Notes in 1995.
robot-environment: service
modified-date: Fri, 9 Apr 1999 11:45:00 GMT
modified-by: Axel Mueller

robot-id:           nhse
robot-name:         NHSE Web Forager
robot-cover-url:    http://nhse.mcs.anl.gov/
robot-details-url:
robot-owner-name:   Robert Olson
robot-owner-url:    http://www.mcs.anl.gov/people/olson/
robot-owner-email:  olson@mcs.anl.gov
robot-status:       
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *.mcs.anl.gov
robot-from:         yes
robot-useragent:    NHSEWalker/3.0
robot-language:     perl 5
robot-description:  to generate a Resource Discovery database
robot-history:      
robot-environment:
modified-date:      Fri May 5 15:47:55 1995
modified-by:

robot-id:           nomad
robot-name:         Nomad
robot-cover-url:    http://www.cs.colostate.edu/~sonnen/projects/nomad.html
robot-details-url:
robot-owner-name:   Richard Sonnen
robot-owner-url:    http://www.cs.colostate.edu/~sonnen/
robot-owner-email:  sonnen@cs.colostat.edu
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:         *.cs.colostate.edu
robot-from:         no
robot-useragent:    Nomad-V2.x
robot-language:     Perl 4
robot-description:
robot-history:      Developed in 1995 at Colorado State University.
robot-environment:
modified-date:      Sat Jan 27 21:02:20 1996.
modified-by:

robot-id:           northstar
robot-name:         The NorthStar Robot
robot-cover-url:    http://comics.scs.unr.edu:7000/top.html
robot-details-url:
robot-owner-name:   Fred Barrie
robot-owner-url:    
robot-owner-email:  barrie@unr.edu
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      
robot-host:         frognot.utdallas.edu, utdallas.edu, cnidir.org
robot-from:         yes
robot-useragent:    NorthStar
robot-language:     
robot-description:  Recent runs (26 April 94) will concentrate on textual
	analysis of the Web versus GopherSpace (from the Veronica
	data) as well as indexing.
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: objectssearch
robot-name: ObjectsSearch
robot-cover-url: http://www.ObjectsSearch.com/
robot-details-url: 
robot-owner-name: Software Objects, Inc
robot-owner-url: http://www.thesoftwareobjects.com/
robot-owner-email: support@thesoftwareobjects.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix, windows
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: ObjectsSearch
robot-noindex: yes
robot-host:
robot-from: yes
robot-useragent: ObjectsSearch/0.01
robot-language: java
robot-description: Objects Search Spider
robot-history: Developed by Software Objects Inc.
robot-environment: commercial
modified-date: Friday March 05, 2004
modified-by: support@thesoftwareobjects.com

robot-id: occam
robot-name: Occam
robot-cover-url: http://www.cs.washington.edu/research/projects/ai/www/occam/
robot-details-url:
robot-owner-name: Marc Friedman
robot-owner-url: http://www.cs.washington.edu/homes/friedman/
robot-owner-email: friedman@cs.washington.edu
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Occam
robot-noindex: no
robot-host: gentian.cs.washington.edu, sekiu.cs.washington.edu, saxifrage.cs.washington.edu
robot-from: yes
robot-useragent: Occam/1.0
robot-language: CommonLisp, perl4
robot-description: The robot takes high-level queries, breaks them down into
                multiple web requests, and answers them by combining disparate
                data gathered in one minute from numerous web sites, or from
                the robots cache.  Currently the only user is me.
robot-history: The robot is a descendant of Rodney,
               an earlier project at the University of Washington.
robot-environment: research
modified-date: Thu, 21 Nov 1996 20:30 GMT
modified-by: friedman@cs.washington.edu (Marc Friedman)

robot-id:           octopus
robot-name:         HKU WWW Octopus
robot-cover-url:    http://phoenix.cs.hku.hk:1234/~jax/w3rui.shtml
robot-details-url:
robot-owner-name:   Law Kwok Tung , Lee Tak Yeung , Lo Chun Wing
robot-owner-url:    http://phoenix.cs.hku.hk:1234/~jax
robot-owner-email:  jax@cs.hku.hk
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no.
robot-exclusion-useragent:
robot-noindex:
robot-host:         phoenix.cs.hku.hk
robot-from:         yes
robot-useragent:    HKU WWW Robot, 
robot-language:     Perl 5, C, Java.
robot-description:  HKU Octopus is an ongoing project for resource discovery in
	the Hong Kong and China WWW domain . It is a research
	project conducted by three undergraduate at the University
	of Hong Kong
robot-history:
robot-environment:
modified-date:      Thu Mar  7 14:21:55 1996.
modified-by:

robot-id:OntoSpider
robot-name:OntoSpider
robot-cover-url:http://ontospider.i-n.info
robot-details-url:http://ontospider.i-n.info
robot-owner-name:C. Fenijn
robot-owner-url:http://ontospider.i-n.info
robot-owner-email:ontospider@int-org.com
robot-status:development
robot-purpose:statistics
robot-type:standalone
robot-platform:unix
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:
robot-noindex:no
robot-host:ontospider.i-n.info
robot-from:no
robot-useragent:OntoSpider/1.0 libwww-perl/5.65
robot-language:perl5
robot-description:Focused crawler for research purposes
robot-history:Research
robot-environment:research
modified-date:Sun Mar 28 14:39:38
modified-by:C. Fenijn

robot-id:			openfind
robot-name:			Openfind data gatherer
robot-cover-url:		http://www.openfind.com.tw/
robot-details-url:		http://www.openfind.com.tw/robot.html
robot-owner-name:
robot-owner-url:
robot-owner-email:		robot-response@openfind.com.tw
robot-status:			active
robot-purpose:			indexing
robot-type:			standalone
robot-platform:
robot-availability:
robot-exclusion:		yes
robot-exclusion-useragent:
robot-noindex:
robot-host:			66.7.131.132
robot-from:
robot-useragent:		Openfind data gatherer, Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)
robot-language:
robot-description:
robot-history:
robot-environment:
modified-date:			Thu, 26 Apr 2001 02:55:21 GMT
modified-by:			stanislav shalunov <shalunov@internet2.edu>

robot-id: orb_search
robot-name: Orb Search
robot-cover-url: http://orbsearch.home.ml.org
robot-details-url: http://orbsearch.home.ml.org
robot-owner-name: Matt Weber
robot-owner-url: http://www.weberworld.com
robot-owner-email: webernet@geocities.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: Orbsearch/1.0
robot-noindex: yes
robot-host: cow.dyn.ml.org, *.dyn.ml.org
robot-from: yes
robot-useragent: Orbsearch/1.0
robot-language: Perl5
robot-description: Orbsearch builds the database for Orb Search Engine.
  It runs when requested.
robot-history: This robot was started as a hobby.
robot-environment: hobby
modified-date: Sun, 31 Aug 1997 02:28:52 GMT
modified-by: Matt Weber

robot-id: packrat
robot-name: Pack Rat
robot-cover-url: http://web.cps.msu.edu/~dexterte/isl/packrat.html
robot-details-url: 
robot-owner-name: Terry Dexter
robot-owner-url: http://web.cps.msu.edu/~dexterte
robot-owner-email: dexterte@cps.msu.edu
robot-status: development
robot-purpose: both maintenance and mirroring
robot-type: standalone
robot-platform: unix
robot-availability:  at the moment, none...source when developed.
robot-exclusion: yes 
robot-exclusion-useragent: packrat or *
robot-noindex: no, not yet
robot-host: cps.msu.edu
robot-from: 
robot-useragent: PackRat/1.0
robot-language: perl with libwww-5.0
robot-description: Used for local maintenance and for gathering 
	web pages so
	that local statisistical info can be used in artificial intelligence programs.
         Funded by NEMOnline.
robot-history: In the making...
robot-environment: research
modified-date: Tue, 20 Aug 1996 15:45:11
modified-by: Terry Dexter

robot-id:pageboy
robot-name:PageBoy
robot-cover-url:http://www.webdocs.org/
robot-details-url:http://www.webdocs.org/ 
robot-owner-name:Chihiro Kuroda 
robot-owner-url:http://www.webdocs.org/
robot-owner-email:pageboy@webdocs.org
robot-status:development
robot-purpose:indexing
robot-type:standalone
robot-platform:unix
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:pageboy
robot-noindex:yes
robot-nofollow:yes
robot-host:*.webdocs.org
robot-from:yes
robot-useragent:PageBoy/1.0
robot-language:c
robot-description:The robot visits at regular intervals.
robot-history:none
robot-environment:service
modified-date:Fri, 21 Oct 1999 17:28:52 GMT
modified-by:webdocs

robot-id: parasite
robot-name: ParaSite
robot-cover-url: http://www.ianett.com/parasite/
robot-details-url: http://www.ianett.com/parasite/
robot-owner-name: iaNett.com
robot-owner-url: http://www.ianett.com/
robot-owner-email: parasite@ianett.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: ParaSite
robot-noindex: yes
robot-nofollow: yes
robot-host: *.ianett.com
robot-from: yes
robot-useragent: ParaSite/0.21 (http://www.ianett.com/parasite/)
robot-language: c++
robot-description: Builds index for ianett.com search database. Runs
 continiously.
robot-history: Second generation of ianett.com spidering technology,
 originally called Sven.
robot-environment: service
modified-date: July 28, 2000
modified-by: Marty Anstey

robot-id:               patric
robot-name:             Patric
robot-cover-url:        http://www.nwnet.net/technical/ITR/index.html
robot-details-url:      http://www.nwnet.net/technical/ITR/index.html
robot-owner-name:       toney@nwnet.net
robot-owner-url:        http://www.nwnet.net/company/staff/toney
robot-owner-email:      webmaster@nwnet.net
robot-status:           development
robot-purpose:          statistics
robot-type:             standalone
robot-platform:         unix
robot-availability:     data
robot-exclusion:        yes
robot-exclusion-useragent: patric       
robot-noindex:          yes     
robot-host:             *.nwnet.net     
robot-from:             no
robot-useragent:        Patric/0.01a            
robot-language:         perl
robot-description:      (contained at http://www.nwnet.net/technical/ITR/index.html )
robot-history:          (contained at http://www.nwnet.net/technical/ITR/index.html )
robot-environment:      service 
modified-date:          Thurs, 15 Aug 1996
modified-by:            toney@nwnet.net

robot-id: pegasus
robot-name: pegasus
robot-cover-url: http://opensource.or.id/projects.html
robot-details-url: http://pegasus.opensource.or.id
robot-owner-name: A.Y.Kiky Shannon
robot-owner-url: http://go.to/ayks
robot-owner-email: shannon@opensource.or.id
robot-status: inactive - open source
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: source, binary
robot-exclusion: yes
robot-exclusion-useragent: pegasus
robot-noindex: yes
robot-host: *
robot-from: yes
robot-useragent: web robot PEGASUS
robot-language: perl5
robot-description: pegasus gathers information from HTML pages (7 important
 tags). The indexing process can be started based on starting URL(s) or a range
 of IP address.
robot-history: This robot was created as an implementation of a final project on
 Informatics Engineering Department, Institute of Technology Bandung, Indonesia.
robot-environment: research
modified-date: Fri, 20 Oct 2000 14:58:40 GMT
modified-by: A.Y.Kiky Shannon

robot-id:           perignator
robot-name:         The Peregrinator
robot-cover-url:    http://www.maths.usyd.edu.au:8000/jimr/pe/Peregrinator.html
robot-details-url:
robot-owner-name:   Jim Richardson
robot-owner-url:    http://www.maths.usyd.edu.au:8000/jimr.html
robot-owner-email:  jimr@maths.su.oz.au
robot-status:       
robot-purpose:      
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         yes
robot-useragent:    Peregrinator-Mathematics/0.7
robot-language:     perl 4
robot-description:  This robot is being used to generate an index of documents
	on Web sites connected with mathematics and statistics. It
	ignores off-site links, so does not stray from a list of
	servers specified initially.
robot-history:      commenced operation in August 1994
robot-environment:
modified-date:      
modified-by:

robot-id: perlcrawler
robot-name: PerlCrawler 1.0
robot-cover-url: http://perlsearch.hypermart.net/
robot-details-url: http://www.xav.com/scripts/xavatoria/index.html
robot-owner-name: Matt McKenzie 
robot-owner-url: http://perlsearch.hypermart.net/
robot-owner-email: webmaster@perlsearch.hypermart.net
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: perlcrawler
robot-noindex: yes
robot-host: server5.hypermart.net
robot-from: yes
robot-useragent: PerlCrawler/1.0 Xavatoria/2.0
robot-language: perl5
robot-description: The PerlCrawler robot is designed to index and build
 a database of pages relating to the Perl programming language.
robot-history: Originated in modified form on 25 June 1998
robot-environment: hobby
modified-date: Fri, 18 Dec 1998 23:37:40 GMT
modified-by: Matt McKenzie

robot-id:           phantom
robot-name:         Phantom
robot-cover-url:    http://www.maxum.com/phantom/
robot-details-url:
robot-owner-name:   Larry Burke
robot-owner-url:    http://www.aktiv.com/
robot-owner-email:  lburke@aktiv.com
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     Macintosh
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    Duppies
robot-language:
robot-description:  Designed to allow webmasters to provide a searchable index
	of their own site as well as to other sites, perhaps with
	similar content.
robot-history:
robot-environment:
modified-date:      Fri Jan 19 05:08:15 1996.
modified-by:

robot-id: phpdig
robot-name: PhpDig
robot-cover-url: http://phpdig.toiletoine.net/
robot-details-url: http://phpdig.toiletoine.net/
robot-owner-name: Antoine Bajolet
robot-owner-url: http://phpdig.toiletoine.net/
robot-owner-email: phpdig@toiletoine.net
robot-status: *
robot-purpose: indexing
robot-type: standalone
robot-platform: all supported by Apache/php/mysql
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: phpdig
robot-noindex: yes
robot-host: yes
robot-from: no
robot-useragent: phpdig/x.x.x
robot-language: php 4.x
robot-description: Small robot and search engine written in php.
robot-history: writen first 2001-03-30
robot-environment: hobby
modified-date: Sun, 21 Nov 2001 20:01:19 GMT
modified-by: Antoine Bajolet

robot-id: piltdownman
robot-name: PiltdownMan
robot-cover-url: http://profitnet.bizland.com/
robot-details-url: http://profitnet.bizland.com/piltdownman.html
robot-owner-name: Daniel Vil
robot-owner-url: http://profitnet.bizland.com/aboutus.html
robot-owner-email: profitnet@myezmail.com
robot-status: active
robot-purpose: statistics
robot-type: standalone
robot-platform: windows95, windows98, windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: piltdownman
robot-noindex: no
robot-nofollow: no
robot-host: 62.36.128.*, 194.133.59.*, 212.106.215.*
robot-from: no
robot-useragent: PiltdownMan/1.0 profitnet@myezmail.com
robot-language: c++
robot-description: The PiltdownMan robot is used to get a
                   list of links from the search engines
                   in our database. These links are
                   followed, and the page that they refer
                   is downloaded to get some statistics
                   from them.
                   The robot runs once a month, more or
                   less, and visits the first 10 pages
                   listed in every search engine, for a
                   group of keywords.
robot-history: To maintain a database of search engines,
               we needed an automated tool. That's why
               we began the creation of this robot.
robot-environment: service
modified-date: Mon, 13 Dec 1999 21:50:32 GMT
modified-by: Daniel Vil

robot-id: pimptrain
robot-name: Pimptrain.com's robot
robot-cover-url: http://www.pimptrain.com/search.cgi
robot-details-url: http://www.pimptrain.com/search.cgi
robot-owner-name: Bryan Ankielewicz
robot-owner-url: http://www.pimptrain.com
robot-owner-email: webmaster@pimptrain.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: source;data
robot-exclusion: yes
robot-exclusion-useragent: Pimptrain
robot-noindex: yes
robot-host: pimtprain.com
robot-from: *
robot-useragent: Mozilla/4.0 (compatible: Pimptrain's robot)
robot-language: perl5
robot-description: Crawls remote sites as part of a search engine program
robot-history: Implemented in 2001
robot-environment: commercial
modified-date: May 11, 2001
modified-by: Bryan Ankielewicz

robot-id:           pioneer
robot-name:         Pioneer
robot-cover-url:    http://sequent.uncfsu.edu/~micah/pioneer.html
robot-details-url:
robot-owner-name:   Micah A. Williams
robot-owner-url:    http://sequent.uncfsu.edu/~micah/
robot-owner-email:  micah@sequent.uncfsu.edu
robot-status:
robot-purpose:      indexing, statistics
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         *.uncfsu.edu or flyer.ncsc.org
robot-from:         yes
robot-useragent:    Pioneer
robot-language:     C.
robot-description:  Pioneer is part of an undergraduate research
	project.
robot-history:
robot-environment:
modified-date:      Mon Feb  5 02:49:32 1996.
modified-by:

robot-id:           pitkow
robot-name:         html_analyzer
robot-cover-url:    
robot-details-url:
robot-owner-name:   James E. Pitkow
robot-owner-url:    
robot-owner-email:  pitkow@aries.colorado.edu
robot-status:       
robot-purpose:      maintainance
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  to check validity of Web servers. I'm not sure if it has
	ever been run remotely.
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: pjspider
robot-name: Portal Juice Spider
robot-cover-url: http://www.portaljuice.com
robot-details-url: http://www.portaljuice.com/pjspider.html
robot-owner-name: Nextopia Software Corporation
robot-owner-url: http://www.portaljuice.com
robot-owner-email: pjspider@portaljuice.com
robot-status: active
robot-purpose: indexing, statistics
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: pjspider
robot-noindex: yes
robot-host: *.portaljuice.com, *.nextopia.com
robot-from: yes
robot-useragent: PortalJuice.com/4.0
robot-language: C/C++
robot-description: Indexing web documents for Portal Juice vertical portal
 search engine
robot-history: Indexing the web since 1998 for the purposes of offering our
 commerical Portal Juice search engine services.
robot-environment: service
modified-date: Wed Jun 23 17:00:00 EST 1999
modified-by: pjspider@portaljuice.com

robot-id:           pka
robot-name:         PGP Key Agent
robot-cover-url:    http://www.starnet.it/pgp
robot-details-url:
robot-owner-name:   Massimiliano Pucciarelli
robot-owner-url:    http://www.starnet.it/puma
robot-owner-email:  puma@comm2000.it
robot-status:       Active
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     UNIX, Windows NT
robot-availability: none
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         salerno.starnet.it
robot-from:         yes
robot-useragent:    PGP-KA/1.2
robot-language:     Perl 5
robot-description:  This program search the pgp public key for the 
                    specified user.
robot-history:      Originated as a research project at Salerno 
                    University in 1995.
robot-environment:  Research
modified-date:      June 27 1996.
modified-by:        Massimiliano Pucciarelli

robot-id: plumtreewebaccessor
robot-name: PlumtreeWebAccessor 
robot-cover-url:
robot-details-url: http://www.plumtree.com/
robot-owner-name: Joseph A. Stanko 
robot-owner-url:
robot-owner-email: josephs@plumtree.com
robot-status: development
robot-purpose: indexing for the Plumtree Server
robot-type: standalone
robot-platform: windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: PlumtreeWebAccessor
robot-noindex: yes
robot-host:
robot-from: yes
robot-useragent: PlumtreeWebAccessor/0.9
robot-language: c++
robot-description: The Plumtree Web Accessor is a component that
 customers can add to the
        Plumtree Server to index documents on the World Wide Web.
robot-history:
robot-environment: commercial
modified-date: Thu, 17 Dec 1998
modified-by: Joseph A. Stanko <josephs@plumtree.com>

robot-id: poppi
robot-name: Poppi
robot-cover-url: http://members.tripod.com/poppisearch
robot-details-url: http://members.tripod.com/poppisearch
robot-owner-name: Antonio Provenzano
robot-owner-url: Antonio Provenzano
robot-owner-email:
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix/linux
robot-availability: none
robot-exclusion:
robot-exclusion-useragent:
robot-noindex: yes
robot-host:=20
robot-from:
robot-useragent: Poppi/1.0
robot-language: C
robot-description: Poppi is a crawler to index the web that runs weekly 
 gathering and indexing hypertextual, multimedia and executable file 
 formats
robot-history: Created by Antonio Provenzano in the april of 2000, has 
 been acquired from Tomi Officine Multimediali srl and it is next to 
 release as service and commercial
robot-environment: service
modified-date: Mon, 22 May 2000 15:47:30 GMT
modified-by: Antonio Provenzano

robot-id: portalb
robot-name: PortalB Spider
robot-cover-url: http://www.portalb.com/
robot-details-url:
robot-owner-name: PortalB Spider Bug List
robot-owner-url:
robot-owner-email: spider@portalb.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: PortalBSpider
robot-noindex: yes
robot-nofollow: yes
robot-host: spider1.portalb.com, spider2.portalb.com, etc.
robot-from: no
robot-useragent: PortalBSpider/1.0 (spider@portalb.com)
robot-language: C++
robot-description: The PortalB Spider indexes selected sites for
 high-quality business information.
robot-history:
robot-environment: service

robot-id: psbot
robot-name: psbot
robot-cover-url: http://www.picsearch.com/
robot-details-url: http://www.picsearch.com/bot.html
robot-owner-name: picsearch AB
robot-owner-url: http://www.picsearch.com/
robot-owner-email: psbot@picsearch.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: psbot
robot-noindex: yes
robot-nofollow: yes
robot-host: *.picsearch.com
robot-from: yes
robot-useragent: psbot/0.X (+http://www.picsearch.com/bot.html)
robot-language: c, c++
robot-description: Spider for www.picsearch.com 
robot-history: Developed and tested in 2000/2001
robot-environment: commercial
modified-date: Tue, 21 Aug 2001 10:55:38 CEST 2001
modified-by: psbot@picsearch.com

robot-id: Puu
robot-name: GetterroboPlus Puu
robot-details-url: http://marunaka.homing.net/straight/getter/
robot-cover-url: http://marunaka.homing.net/straight/
robot-owner-name: marunaka
robot-owner-url: http://marunaka.homing.net
robot-owner-email: marunaka@homing.net
robot-status: active: robot actively in use
robot-purpose: Purpose of the robot. One or more of:
  - gathering: gather data of original standerd TAG for Puu contains the
 information of the sites registered my Search Engin.
  - maintenance: link validation
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes (Puu patrols only registered url in my Search Engine)
robot-exclusion-useragent:  Getterrobo-Plus
robot-noindex:  no
robot-host: straight FLASH!! Getterrobo-Plus, *.homing.net
robot-from: yes
robot-useragent: straight FLASH!! GetterroboPlus 1.5
robot-language: perl5
robot-description:
  Puu robot is used to gater data from registered site in Search Engin
 "straight FLASH!!" for building anouncement page of state of renewal of
 registered site in "straight FLASH!!".
 Robot runs everyday.
robot-history:
  This robot patorols based registered sites in Search Engin "straight FLASH!!"
robot-environment: hobby
modified-date: Fri, 26 Jun 1998

robot-id:           python
robot-name:         The Python Robot
robot-cover-url:    http://www.python.org/
robot-details-url:  
robot-owner-name:   Guido van Rossum
robot-owner-url:    http://www.python.org/~guido/
robot-owner-email:  guido@python.org
robot-status:       retired
robot-purpose:      
robot-type:         
robot-platform:     
robot-availability: none
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: raven 
robot-name: Raven Search
robot-cover-url: http://ravensearch.tripod.com
robot-details-url: http://ravensearch.tripod.com
robot-owner-name: Raven Group
robot-owner-url: http://ravensearch.tripod.com
robot-owner-email: ravensearch@hotmail.com
robot-status: Development: robot under development
robot-purpose: Indexing: gather content for commercial query engine.
robot-type: Standalone: a separate program
robot-platform: Unix, Windows98, WindowsNT, Windows2000
robot-availability: None
robot-exclusion: Yes
robot-exclusion-useragent: Raven
robot-noindex: Yes
robot-nofollow: Yes
robot-host: 192.168.1.*
robot-from: Yes
robot-useragent: Raven-v2
robot-language: Perl-5
robot-description: Raven was written for the express purpose of indexing the web.
 It can parallel process hundreds of URLS's at a time. It runs on a sporadic basis 
 as testing continues. It is really several programs running concurrently.
 It takes four computers to run Raven Search. Scalable in sets of four.
robot-history: This robot is new. First active on March 25, 2000.
robot-environment: Commercial: is a commercial product. Possibly GNU later ;-)
modified-date: Fri, 25 Mar 2000 17:28:52 GMT
modified-by: Raven Group

robot-id:           rbse
robot-name:         RBSE Spider
robot-cover-url:    http://rbse.jsc.nasa.gov/eichmann/urlsearch.html
robot-details-url:
robot-owner-name:   David Eichmann
robot-owner-url:    http://rbse.jsc.nasa.gov/eichmann/home.html
robot-owner-email:  eichmann@rbse.jsc.nasa.gov
robot-status:       active
robot-purpose:      indexing, statistics
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      
robot-host:         rbse.jsc.nasa.gov (192.88.42.10)
robot-from:         
robot-useragent:    
robot-language:     C, oracle, wais
robot-description:  Developed and operated as part of the NASA-funded Repository
	Based Software Engineering Program at the Research Institute
	for Computing and Information Systems, University of Houston
	- Clear Lake.
robot-history:      
robot-environment:
modified-date:      Thu May 18 04:47:02 1995
modified-by:

robot-id:           resumerobot
robot-name:         Resume Robot
robot-cover-url:    http://www.onramp.net/proquest/resume/robot/robot.html
robot-details-url:
robot-owner-name:   James Stakelum
robot-owner-url:    http://www.onramp.net/proquest/resume/java/resume.html
robot-owner-email:  proquest@onramp.net
robot-status:
robot-purpose:      indexing.
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    Resume Robot
robot-language:     C++.
robot-description:
robot-history:
robot-environment:
modified-date:      Tue Mar 12 15:52:25 1996.
modified-by:

robot-id: rhcs
robot-name: RoadHouse Crawling System
robot-cover-url: http://stage.perceval.be (under developpement)
robot-details-url:
robot-owner-name: Gregoire Welraeds, Emmanuel Bergmans
robot-owner-url: http://www.perceval.be
robot-owner-email: helpdesk@perceval.be
robot-status: development
robot-purpose1: indexing
robot-purpose2: maintenance
robot-purpose3: statistics
robot-type: standalone
robot-platform1: unix (FreeBSD & Linux)
robot-availability: none
robot-exclusion: no (under development)
robot-exclusion-useragent: RHCS
robot-noindex: no (under development)
robot-host: stage.perceval.be
robot-from: no
robot-useragent: RHCS/1.0a
robot-language: c
robot-description: robot used tp build the database for the RoadHouse search service project operated by Perceval  
robot-history: The need of this robot find its roots in the actual RoadHouse directory not maintenained since 1997
robot-environment: service
modified-date: Fri, 26 Feb 1999 12:00:00 GMT
modified-by: Gregoire Welraeds

robot-id: rixbot
robot-name: RixBot
robot-cover-url: http://www.oops-as.no/rix
robot-details-url: http://www.oops-as.no/roy/rix
robot-owner-name: HY
robot-owner-url: http://www.oops-as.no/roy
robot-status: active
robot-purpose: indexing
robot-type:standalone
robot-platform: mac
robot-exclusion: yes
robot-exclusion-useragent: RixBot
robot-noindex: yes
robot-nofollow: yes
robot-host: www.oops-as.no
robot-from: no
robot-useragent: RixBot (http://www.oops-as.no/rix/)
robot-language: REBOL
robot-description: The RixBot indexes any page containing the word "rebol".
robot-history: Hobby project
robot-environment: Hobby
modified-date: Fri, 14 May 2004 19:58:52 GMT

robot-id: roadrunner
robot-name: Road Runner: The ImageScape Robot
robot-owner-name: LIM Group
robot-owner-email: lim@cs.leidenuniv.nl
robot-status: development/active
robot-purpose: indexing
robot-type: standalone
robot-platform: UNIX
robot-exclusion: yes
robot-exclusion-useragent: roadrunner
robot-useragent: Road Runner: ImageScape Robot (lim@cs.leidenuniv.nl)
robot-language: C, perl5
robot-description: Create Image/Text index for WWW
robot-history: ImageScape Project
robot-environment: commercial service
modified-date: Dec. 1st, 1996

robot-id: robbie
robot-name: Robbie the Robot
robot-cover-url:
robot-details-url:
robot-owner-name: Robert H. Pollack
robot-owner-url:
robot-owner-email: robert.h.pollack@lmco.com
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix, windows95, windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Robbie
robot-noindex: no
robot-host: *.lmco.com
robot-from: yes
robot-useragent: Robbie/0.1
robot-language: java
robot-description: Used to define document collections for the DISCO system.
                   Robbie is still under development and runs several
                   times a day, but usually only for ten minutes or so.
                   Sites are visited in the order in which references
                   are found, but no host is visited more than once in
                   any two-minute period.
robot-history: The DISCO system is a resource-discovery component in
               the OLLA system, which is a prototype system, developed
               under DARPA funding, to support computer-based education
               and training.
robot-environment: research
modified-date: Wed,  5 Feb 1997 19:00:00 GMT
modified-by:


robot-id: robi
robot-name: ComputingSite Robi/1.0
robot-cover-url: http://www.computingsite.com/robi/
robot-details-url: http://www.computingsite.com/robi/
robot-owner-name: Tecor Communications S.L.
robot-owner-url: http://www.tecor.com/
robot-owner-email: robi@computingsite.com
robot-status: Active
robot-purpose: indexing,maintenance
robot-type: standalone
robot-platform: UNIX
robot-availability:
robot-exclusion: yes
robot-exclusion-useragent: robi
robot-noindex: no
robot-host: robi.computingsite.com
robot-from:
robot-useragent: ComputingSite Robi/1.0 (robi@computingsite.com)
robot-language: python
robot-description: Intelligent agent used to build the ComputingSite Search
 Directory.
robot-history: It was born on August 1997.
robot-environment: service
modified-date: Wed, 13 May 1998 17:28:52 GMT
modified-by: Jorge Alegre

robot-id: robocrawl
robot-name: RoboCrawl Spider
robot-cover-url: http://www.canadiancontent.net/
robot-details-url: http://www.canadiancontent.net/corp/spider.html
robot-owner-name: Canadian Content Interactive Media
robot-owner-url: http://www.canadiancontent.net/
robot-owner-email: staff@canadiancontent.net
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: RoboCrawl
robot-noindex: yes
robot-host: ncc.canadiancontent.net, ncc.air-net.no, canadiancontent.net, spider.canadiancontent.net
robot-from: no
robot-useragent: RoboCrawl (http://www.canadiancontent.net)
robot-language: C and C++
robot-description: The Canadian Content robot indexes for it's search database.
robot-history: Our robot is a newer project at Canadian Content.
robot-environment: service
modified-date: July 30th, 2001
modified-by: Christopher Walsh and Adam Rutter

robot-id: robofox
robot-name: RoboFox
robot-cover-url:
robot-details-url:
robot-owner-name: Ian Hicks
robot-owner-url:
robot-owner-email: robo_fox@hotmail.com
robot-status: development
robot-purpose: site download
robot-type: standalone
robot-platform: windows9x, windowsme, windowsNT4, windows2000
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent: robofox
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: Robofox v2.0
robot-language: Visual FoxPro
robot-description: scheduled utility to download and database a domain
robot-history:
robot-environment: service
modified-date: Tue, 6 Mar 2001 02:15:00 GMT
modified-by: Ian Hicks

robot-id: robozilla
robot-name: Robozilla
robot-cover-url: http://dmoz.org/
robot-details-url: http://www.dmoz.org/newsletter/2000Aug/robo.html
robot-owner-name: "Rob O'Zilla"
robot-owner-url: http://dmoz.org/profiles/robozilla.html
robot-owner-email: robozilla@dmozed.org
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-availability: none
robot-exclusion: no
robot-noindex: no
robot-host: directory.mozilla.org
robot-useragent: Robozilla/1.0
robot-description: Robozilla visits all the links within the Open Directory
 periodically, marking the ones that return errors for review.
robot-environment: service

robot-id:           roverbot
robot-name:         Roverbot
robot-cover-url:    http://www.roverbot.com/
robot-details-url:
robot-owner-name:   GlobalMedia Design (Andrew Cowan & Brian
	Clark)
robot-owner-url:    http://www.radzone.org/gmd/
robot-owner-email:  gmd@spyder.net
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         roverbot.com
robot-from:         yes
robot-useragent:    Roverbot
robot-language:     perl5
robot-description:  Targeted email gatherer utilizing user-defined seed points
	and interacting with both the webserver and MX servers of
	remote sites.
robot-history:
robot-environment:
modified-date:      Tue Jun 18 19:16:31 1996.
modified-by:

robot-id: rules
robot-name: RuLeS
robot-cover-url: http://www.rules.be
robot-details-url: http://www.rules.be
robot-owner-name: Marc Wils
robot-owner-url: http://www.rules.be
robot-owner-email: marc@rules.be
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: yes
robot-noindex: yes
robot-host: www.rules.be
robot-from: yes
robot-useragent: RuLeS/1.0 libwww/4.0
robot-language: Dutch (Nederlands)
robot-description: 
robot-history: none
robot-environment: hobby
modified-date: Sun, 8 Apr 2001 13:06:54 CET
modified-by: Marc Wils

robot-id:           safetynetrobot
robot-name:         SafetyNet Robot
robot-cover-url:    http://www.urlabs.com/
robot-details-url:
robot-owner-name:   Michael L. Nelson
robot-owner-url:    http://www.urlabs.com/
robot-owner-email:  m.l.nelson@urlabs.com
robot-status:
robot-purpose:      indexing.
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no.
robot-exclusion-useragent:
robot-noindex:
robot-host:         *.urlabs.com
robot-from:         yes
robot-useragent:    SafetyNet Robot 0.1, 
robot-language:     Perl 5
robot-description:  Finds URLs for K-12 content management.
robot-history:
robot-environment:
modified-date:      Sat Mar 23 20:12:39 1996.
modified-by:

robot-id: scooter
robot-name: Scooter
robot-cover-url: http://www.altavista.com/
robot-details-url: http://www.altavista.com/av/content/addurl.htm
robot-owner-name: AltaVista
robot-owner-url: http://www.altavista.com/
robot-owner-email: scooter@pa.dec.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Scooter
robot-noindex: yes
robot-host: *.av.pa-x.dec.com
robot-from: yes
robot-useragent: Scooter/2.0 G.R.A.B. V1.1.0
robot-language: c
robot-description: Scooter is AltaVista's prime index agent.
robot-history: Version 2 of Scooter/1.0 developed by Louis Monier of WRL.
robot-environment: service
modified-date: Wed, 13 Jan 1999 17:18:59 GMT
modified-by: steves@avs.dec.com

robot-id: search_au
robot-name: Search.Aus-AU.COM
robot-details-url: http://Search.Aus-AU.COM/
robot-cover-url: http://Search.Aus-AU.COM/
robot-owner-name: Dez Blanchfield
robot-owner-url: not currently available
robot-owner-email: dez@geko.com
robot-status: - development: robot under development
robot-purpose: - indexing: gather content for an indexing service
robot-type: - standalone: a separate program
robot-platform: - mac - unix - windows95 - windowsNT
robot-availability: - none
robot-exclusion: yes
robot-exclusion-useragent: Search-AU
robot-noindex: yes
robot-host: Search.Aus-AU.COM, 203.55.124.29, 203.2.239.29
robot-from: no
robot-useragent: not available
robot-language: c, perl, sql
robot-description: Search-AU is a development tool I have built
 to investigate the power of a search engine and web crawler
 to give me access to a database of web content ( html / url's )
 and address's etc from which I hope to build more accurate stats
 about the .au zone's web content.
 the robot started crawling from http://www.geko.net.au/ on
 march 1st, 1998 and after nine days had 70mb of compressed ascii
 in a database to work with. i hope to run a refresh of the crawl
 every month initially, and soon every week bandwidth and cpu allowing.
 if the project warrants further development, i will turn it into
 an australian ( .au ) zone search engine and make it commercially
 available for advertising to cover the costs which are starting
 to mount up. --dez (980313 - black friday!)
robot-environment: - hobby: written as a hobby
modified-date: Fri Mar 13 10:03:32 EST 1998

robot-id: search-info
robot-name: Sleek
robot-cover-url: http://search-info.com/
robot-details-url:
robot-owner-name: Lawrence R. Hughes, Sr.
robot-owner-url: http://hughesnet.net/
robot-owner-email: lawrence.hughes@search-info.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Unix, Linux, Windows
robot-availability: source;data
robot-exclusion: yes
robot-exclusion-useragent: robots.txt
robot-noindex: yes
robot-host: yes
robot-from: yes
robot-useragent: Mozilla/4.0 (Sleek Spider/1.2)
robot-language: perl5
robot-description: Crawls remote sites and performs link popularity checks before inclusion.
robot-history: HyBrid of the FDSE Crawler by: Zoltan Milosevic Current Mods: started 1/10/2002
robot-environment: hobby
modified-date: Mon, 14 Jan 2002 08:02:23 GMT
modified-by: Lawrence R. Hughes, Sr.

robot-id: searchprocess
robot-name: SearchProcess
robot-cover-url: http://www.searchprocess.com
robot-details-url: http://www.intelligence-process.com
robot-owner-name: Mannina Bruno
robot-owner-url: http://www.intelligence-process.com
robot-owner-email: bruno@intelligence-process.com
robot-status: active
robot-purpose: Statistic
robot-type: browser
robot-platform: linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: searchprocess
robot-noindex: yes
robot-host: searchprocess.com
robot-from: yes
robot-useragent: searchprocess/0.9
robot-language: perl
robot-description: An intelligent Agent Online. SearchProcess is used to
 provide structured information to user.
robot-history: This is the son of Auresys
robot-environment: Service freeware
modified-date: Thus, 22 Dec 1999
modified-by: Mannina Bruno

robot-id:           senrigan
robot-name:         Senrigan
robot-cover-url:    http://www.info.waseda.ac.jp/search-e.html
robot-details-url:
robot-owner-name:   TAMURA Kent
robot-owner-url:    http://www.info.waseda.ac.jp/muraoka/members/kent/
robot-owner-email:  kent@muraoka.info.waseda.ac.jp
robot-status:       active
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     Java
robot-availability: none
robot-exclusion:    yes
robot-exclusion-useragent:Senrigan
robot-noindex:      yes
robot-host:         aniki.olu.info.waseda.ac.jp
robot-from:         yes
robot-useragent:    Senrigan/xxxxxx
robot-language:     Java
robot-description:  This robot now gets HTMLs from only jp domain.
robot-history:      It has been running since Dec 1994
robot-environment:  research
modified-date:      Mon Jul  1 07:30:00 GMT 1996
modified-by:        TAMURA Kent

robot-id:           sgscout
robot-name:         SG-Scout
robot-cover-url:    http://www-swiss.ai.mit.edu/~ptbb/SG-Scout/SG-Scout.html
robot-details-url:
robot-owner-name:   Peter Beebee
robot-owner-url:    http://www-swiss.ai.mit.edu/~ptbb/personal/index.html
robot-owner-email:  ptbb@ai.mit.edu, beebee@parc.xerox.com
robot-status:       active
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         beta.xerox.com
robot-from:         yes
robot-useragent:    SG-Scout
robot-language:     
robot-description:  Does a "server-oriented" breadth-first search in a
	round-robin fashion, with multiple processes.
robot-history:      Run since 27 June 1994, for an internal XEROX research
	project
robot-environment:
modified-date:      
modified-by:

robot-id:shaggy
robot-name:ShagSeeker
robot-cover-url:http://www.shagseek.com
robot-details-url:
robot-owner-name:Joseph Reynolds
robot-owner-url:http://www.shagseek.com
robot-owner-email:joe.reynolds@shagseek.com
robot-status:active
robot-purpose:indexing
robot-type:standalone
robot-platform:unix
robot-availability:data
robot-exclusion:yes
robot-exclusion-useragent:Shagseeker
robot-noindex:yes
robot-host:shagseek.com
robot-from:
robot-useragent:Shagseeker at http://www.shagseek.com /1.0
robot-language:perl5
robot-description:Shagseeker is the gatherer for the Shagseek.com search 
 engine and goes out weekly.
robot-history:none yet
robot-environment:service
modified-date:Mon 17 Jan 2000 10:00:00 EST
modified-by:Joseph Reynolds

robot-id: shaihulud
robot-name: Shai'Hulud
robot-cover-url: 
robot-details-url:
robot-owner-name: Dimitri Khaoustov
robot-owner-url:
robot-owner-email: shawdow@usa.net
robot-status: active
robot-purpose: mirroring
robot-type: standalone
robot-platform: unix
robot-availability: source
robot-exclusion: no
robot-exclusion-useragent: 
robot-noindex: no
robot-host: *.rdtex.ru
robot-from:
robot-useragent: Shai'Hulud
robot-language: C
robot-description: Used to build mirrors for internal use
robot-history: This robot finds its roots in a research project at RDTeX 
        Perspective Projects Group in 1996
robot-environment: research
modified-date: Mon, 5 Aug 1996 14:35:08 GMT
modified-by: Dimitri Khaoustov

robot-id: sift
robot-name: Sift
robot-cover-url: http://www.worthy.com/
robot-details-url: http://www.worthy.com/
robot-owner-name: Bob Worthy    
robot-owner-url: http://www.worthy.com/~bworthy  
robot-owner-email: bworthy@worthy.com
robot-status: development, active  
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: sift
robot-noindex: yes
robot-host: www.worthy.com
robot-from:
robot-useragent: libwww-perl-5.41
robot-language: perl
robot-description: Subject directed (via key phrase list) indexing.
robot-history: Libwww of course, implementation using MySQL August, 1999.
 Indexing Search and Rescue sites.
robot-environment: research, service
modified-date: Sat, 16 Oct 1999 19:40:00 GMT
modified-by: Bob Worthy

robot-id: simbot
robot-name: Simmany Robot Ver1.0
robot-cover-url: http://simmany.hnc.net/
robot-details-url: http://simmany.hnc.net/irman1.html
robot-owner-name: Youngsik, Lee(@L?5=D)
robot-owner-url:
robot-owner-email: ailove@hnc.co.kr
robot-status: development & active
robot-purpose: indexing, maintenance, statistics
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: SimBot
robot-noindex: no 
robot-host: sansam.hnc.net
robot-from: no
robot-useragent: SimBot/1.0
robot-language: C
robot-description: The Simmany Robot is used to build the Map(DB) for
 the simmany service operated by HNC(Hangul & Computer Co., Ltd.). The
 robot runs weekly, and visits sites that have a useful korean
 information in a defined order.
robot-history: This robot is a part of simmany service and simmini
 products. The simmini is the Web products that make use of the indexing
 and retrieving modules of simmany.
robot-environment: service, commercial
modified-date: Thu, 19 Sep 1996 07:02:26 GMT
modified-by: Youngsik, Lee

robot-id: site-valet
robot-name: Site Valet
robot-cover-url: http://valet.webthing.com/
robot-details-url: http://valet.webthing.com/
robot-owner-name: Nick Kew
robot-owner-url:
robot-owner-email: nick@webthing.com
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: unix
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: Site Valet
robot-noindex: no
robot-host: valet.webthing.com,valet.*
robot-from: yes
robot-useragent: Site Valet
robot-language: perl
robot-description: a deluxe site monitoring and analysis service
robot-history: builds on cg-eye, the WDG Validator, and the Link Valet
robot-environment: service
modified-date: Tue, 27 June 2000
modified-by: nick@webthing.com

robot-id:           sitetech
robot-name:         SiteTech-Rover
robot-cover-url:    http://www.sitetech.com/
robot-details-url:
robot-owner-name:   Anil Peres-da-Silva
robot-owner-url:    http://www.sitetech.com
robot-owner-email:  adasilva@sitetech.com
robot-status:
robot-purpose:      indexing
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    SiteTech-Rover
robot-language:     C++.
robot-description:  Originated as part of a suite of Internet Products to
        organize, search & navigate Intranet sites and to validate
        links in HTML documents.
robot-history: This robot originally went by the name of LiberTech-Rover
robot-environment:
modified-date:      Fri Aug 9 17:06:56 1996.
modified-by: Anil Peres-da-Silva

robot-id: skymob
robot-name: Skymob.com
robot-cover-url: http://www.skymob.com/
robot-details-url: http://www.skymob.com/about.html
robot-owner-name: Have IT Now Limited.
robot-owner-url: http://www.skymob.com/
robot-owner-email: searchmaster@skymob.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: skymob
robot-noindex: no
robot-host: www.skymob.com
robot-from: searchmaster@skymob.com
robot-useragent: aWapClient
robot-language: c++
robot-description: WAP content Crawler.
robot-history: new
robot-environment: service
modified-date: Thu Sep  6 17:50:32 BST 2001
modified-by: Owen Lydiard

robot-id:slcrawler
robot-name:SLCrawler
robot-cover-url:
robot-details-url:
robot-owner-name:Inxight Software
robot-owner-url:http://www.inxight.com
robot-owner-email:kng@inxight.com
robot-status:active
robot-purpose:To build the site map.
robot-type:standalone
robot-platform:windows, windows95, windowsNT
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:SLCrawler/2.0
robot-noindex:no
robot-host:n/a
robot-from:
robot-useragent:SLCrawler
robot-language:Java
robot-description:To build the site map.
robot-history:It is SLCrawler to crawl html page on Internet.
robot-environment: commercial: is a commercial product
modified-date:Nov. 15, 2000
modified-by:Karen Ng

robot-id: slurp
robot-name: Inktomi Slurp
robot-cover-url: http://www.inktomi.com/
robot-details-url: http://www.inktomi.com/slurp.html
robot-owner-name: Inktomi Corporation
robot-owner-url: http://www.inktomi.com/
robot-owner-email: slurp@inktomi.com
robot-status: active
robot-purpose: indexing, statistics
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: slurp
robot-noindex: yes
robot-host: *.inktomi.com
robot-from: yes
robot-useragent: Slurp/2.0
robot-language: C/C++
robot-description: Indexing documents for the HotBot search engine
		(www.hotbot.com), collecting Web statistics
robot-history: Switch from Slurp/1.0 to Slurp/2.0 November 1996
robot-environment: service
modified-date: Fri Feb 28 13:57:43 PST 1997
modified-by: slurp@inktomi.com

robot-id: smartspider
robot-name: Smart Spider
robot-cover-url: http://www.travel-finder.com
robot-details-url: http://www.engsoftware.com/robots.htm
robot-owner-name: Ken Wadland
robot-owner-url: http://www.engsoftware.com
robot-owner-email: ken@engsoftware.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: windows95, windowsNT
robot-availability: data, binary, source
robot-exclusion: Yes
robot-exclusion-useragent: ESI
robot-noindex: Yes
robot-host: 207.16.241.*
robot-from: Yes
robot-useragent: ESISmartSpider/2.0
robot-language: C++
robot-description:  Classifies sites using a Knowledge Base.   Robot collects 
        web pages which are then parsed and feed to the Knowledge Base.  The
 Knowledge Base classifies the sites into any of hundreds of     categories
 based on the vocabulary used.  Currently used by:       //www.travel-finder.com
 (Travel and Tourist Info) and   //www.golightway.com (Christian Sites).
 Several options exist to        control whether sites are discovered and/or
 classified fully        automatically, full manually    or somewhere in between.
robot-history: Feb '96 -- Product design begun.  May '96 -- First data
 results         published by Travel-Finder.  Oct '96 -- Generalized and announced
 and a   product for other sites.  Jan '97 -- First data results published by
        GoLightWay.
robot-environment: service, commercial
modified-date: Mon, 13 Jan 1997 10:41:00 EST
modified-by: Ken Wadland

robot-id: snooper
robot-name: Snooper
robot-cover-url: http://darsun.sit.qc.ca
robot-details-url:
robot-owner-name: Isabelle A. Melnick
robot-owner-url:
robot-owner-email: melnicki@sit.ca
robot-status: part under development and part active
robot-purpose:
robot-type:
robot-platform:
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: snooper
robot-noindex:
robot-host:
robot-from:
robot-useragent: Snooper/b97_01
robot-language:
robot-description:
robot-history:
robot-environment:
modified-date:
modified-by:

robot-id: solbot
robot-name: Solbot
robot-cover-url: http://kvasir.sol.no/
robot-details-url:
robot-owner-name: Frank Tore Johansen
robot-owner-url:
robot-owner-email: ftj@sys.sol.no
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: solbot
robot-noindex: yes
robot-host: robot*.sol.no
robot-from:
robot-useragent: Solbot/1.0 LWP/5.07
robot-language: perl, c
robot-description: Builds data for the Kvasir search service.  Only searches
 sites which ends with one of the following domains: "no", "se", "dk", "is", "fi"
robot-history: This robot is the result of a 3 years old late night hack when
 the Verity robot (of that time) was unable to index sites with iso8859
 characters (in URL and other places), and we just _had_ to have something up and going the next day...
robot-environment: service
modified-date: Tue Apr  7 16:25:05 MET DST 1998
modified-by: Frank Tore Johansen <ftj@sys.sol.no>

robot-id:speedy
robot-name:Speedy Spider
robot-cover-url:http://www.entireweb.com/
robot-details-url:http://www.entireweb.com/speedy.html
robot-owner-name:WorldLight.com AB
robot-owner-url:http://www.worldlight.com
robot-owner-email:speedy@worldlight.com
robot-status:active
robot-purpose:indexing
robot-type:standalone
robot-platform:Windows
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:speedy
robot-noindex:yes
robot-host:router-00.sverige.net, 193.15.210.29, *.entireweb.com,
 *.worldlight.com
robot-from:yes
robot-useragent:Speedy Spider ( http://www.entireweb.com/speedy.html )
robot-language:C, C++
robot-description:Speedy Spider is used to build the database
           for the Entireweb.com search service operated by WorldLight.com
           (part of WorldLight Network).
           The robot runs constantly, and visits sites in a random order.
robot-history:This robot is a part of the highly advanced search engine
 Entireweb.com, that was developed in Halmstad, Sweden during 1998-2000.
robot-environment:service, commercial
modified-date:Mon, 17 July 2000 11:05:03 GMT
modified-by:Marcus Andersson

robot-id: spider_monkey
robot-name: spider_monkey
robot-cover-url: http://www.mobrien.com/add_site.html
robot-details-url: http://www.mobrien.com/add_site.html
robot-owner-name: MPRM Group Limited
robot-owner-url: http://www.mobrien.com
robot-owner-email: mprm@ionsys.com
robot-status: robot actively in use
robot-purpose: gather content for a free indexing service
robot-type: FDSE robot
robot-platform: unix
robot-availability: bulk data gathered by robot available
robot-exclusion: yes
robot-exclusion-useragent: spider_monkey
robot-noindex: yes
robot-host: snowball.ionsys.com
robot-from: yes
robot-useragent: mouse.house/7.1
robot-language: perl5
robot-description: Robot runs every 30 days for a full index and weekly =
 on a list of accumulated visitor requests
robot-history: This robot is under development and currently active
robot-environment: written as an employee / guest service
modified-date: Mon, 22 May 2000 12:28:52 GMT
modified-by: MPRM Group Limited

robot-id: spiderbot
robot-name: SpiderBot
robot-cover-url: http://pisuerga.inf.ubu.es/lsi/Docencia/TFC/ITIG/icruzadn/cover.htm
robot-details-url: http://pisuerga.inf.ubu.es/lsi/Docencia/TFC/ITIG/icruzadn/details.htm
robot-owner-name: Ignacio Cruzado Nu.o
robot-owner-url: http://pisuerga.inf.ubu.es/lsi/Docencia/TFC/ITIG/icruzadn/icruzadn.htm
robot-owner-email: spidrboticruzado@solaria.emp.ubu.es
robot-status: active
robot-purpose: indexing, mirroring
robot-type: standalone, browser
robot-platform: unix, windows, windows95, windowsNT
robot-availability: source, binary, data
robot-exclusion: yes
robot-exclusion-useragent: SpiderBot/1.0
robot-noindex: yes
robot-host: *
robot-from: yes
robot-useragent: SpiderBot/1.0
robot-language: C++, Tcl
robot-description: Recovers Web Pages and saves them on your hard disk.  Then it reindexes them.
robot-history: This Robot belongs to Ignacio Cruzado Nu.o End of Studies Thesis "Recuperador p.ginas Web", to get the titulation of "Management Tecnical Informatics Engineer" in the  for the Burgos University in Spain.
robot-environment: research
modified-date: Sun, 27 Jun 1999 09:00:00 GMT
modified-by: Ignacio Cruzado Nu.o

robot-id: spiderline
robot-name: Spiderline Crawler
robot-cover-url: http://www.spiderline.com/
robot-details-url: http://www.spiderline.com/
robot-owner-name: Benjamin Benson
robot-owner-url: http://www.spiderline.com/
robot-owner-email: ben@spiderline.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: free and commercial services
robot-exclusion: yes
robot-exclusion-useragent: spiderline
robot-noindex: yes
robot-host: *.spiderline.com, *.spiderline.org
robot-from: no
robot-useragent: spiderline/3.1.3
robot-language: c, c++
robot-description: 
robot-history: Developed for Spiderline.com, launched in 2001.
robot-environment: service
modified-date: Wed, 21 Feb 2001 03:36:39 GMT
modified-by: Benjamin Benson

robot-id:spiderman
robot-name:SpiderMan
robot-cover-url:http://www.comp.nus.edu.sg/~leunghok
robot-details-url:http://www.comp.nus.edu.sg/~leunghok/honproj.html
robot-owner-name:Leung Hok Peng , The School Of Computing Nus , Singapore
robot-owner-url:http://www.comp.nus.edu.sg/~leunghok
robot-owner-email:leunghok@comp.nus.edu.sg
robot-status:development & active
robot-purpose:user searching using IR technique
robot-type:stand alone
robot-platform:Java 1.2
robot-availability:binary&source
robot-exclusion:no
robot-exclusion-useragent:nil
robot-noindex:no
robot-host:NA
robot-from:NA
robot-useragent:SpiderMan 1.0
robot-language:java
robot-description:It is used for any user to search the web given a query string
robot-history:Originated from The Center for Natural Product Research and The
 School of computing National University Of Singapore
robot-environment:research
modified-date:08/08/1999
modified-by:Leung Hok Peng and Dr Hsu Wynne

robot-id: spiderview
robot-name: SpiderView(tm)
robot-cover-url: http://www.northernwebs.com/set/spider_view.html
robot-details-url: http://www.northernwebs.com/set/spider_sales.html
robot-owner-name: Northern Webs
robot-owner-url: http://www.northernwebs.com
robot-owner-email: webmaster@northernwebs.com
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: unix, nt
robot-availability: source
robot-exclusion: no
robot-exclusion-useragent:
robot-noindex:
robot-host: bobmin.quad2.iuinc.com, *
robot-from: No
robot-useragent: Mozilla/4.0 (compatible; SpiderView 1.0;unix)
robot-language: perl
robot-description: SpiderView is a server based program which can spider
 a webpage, testing the links found on the page, evaluating your server
 and its performance.
robot-history: This is an offshoot http retrieval program based on our
 Medibot software.
robot-environment: commercial
modified-date:
modified-by:

robot-id:           spry
robot-name:         Spry Wizard Robot
robot-cover-url:    http://www.spry.com/wizard/index.html
robot-details-url:
robot-owner-name:   spry
robot-owner-url:    ttp://www.spry.com/index.html
robot-owner-email:  info@spry.com
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      
robot-host:         wizard.spry.com or tiger.spry.com
robot-from:         no
robot-useragent:    no
robot-language:     
robot-description:  Its purpose is to generate a Resource Discovery database
	Spry is refusing to give any comments about this
	robot
robot-history:      
robot-environment:
modified-date:      Tue Jul 11 09:29:45 GMT 1995
modified-by:

robot-id: ssearcher
robot-name: Site Searcher
robot-cover-url: www.satacoy.com
robot-details-url: www.satacoy.com
robot-owner-name: Zackware
robot-owner-url: www.satacoy.com
robot-owner-email: zackware@hotmail.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: winows95, windows98, windowsNT
robot-availability: binary
robot-exclusion: no
robot-exclusion-useragent:
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: ssearcher100
robot-language: C++
robot-description: Site Searcher scans web sites for specific file types.
 (JPG, MP3, MPG, etc)
robot-history:  Released 4/4/1999
robot-environment: hobby
modified-date: 04/26/1999

robot-id: suke
robot-name: Suke
robot-cover-url: http://www.kensaku.org/
robot-details-url: http://www.kensaku.org/
robot-owner-name: Yosuke Kuroda
robot-owner-url: http://www.kensaku.org/yk/
robot-owner-email: robot@kensaku.org
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: FreeBSD3.*
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: suke
robot-noindex: no
robot-host: *
robot-from: yes
robot-useragent: suke/*.*
robot-language: c
robot-description: This robot visits mainly sites in japan.
robot-history: since 1999
robot-environment: service

robot-id: suntek
robot-name: suntek search engine
robot-cover-url: http://www.portal.com.hk/
robot-details-url: http://www.suntek.com.hk/
robot-owner-name: Suntek Computer Systems
robot-owner-url: http://www.suntek.com.hk/
robot-owner-email: karen@suntek.com.hk
robot-status: operational
robot-purpose: to create a search portal on Asian web sites
robot-type:
robot-platform: NT, Linux, UNIX
robot-availability: available now
robot-exclusion:
robot-exclusion-useragent:
robot-noindex: yes
robot-host: search.suntek.com.hk
robot-from: yes
robot-useragent: suntek/1.0
robot-language: Java
robot-description: A multilingual search engine with emphasis on Asia contents
robot-history:
robot-environment:
modified-date:
modified-by:

robot-id: sven
robot-name: Sven
robot-cover-url: 
robot-details-url: http://marty.weathercity.com/sven/
robot-owner-name: Marty Anstey
robot-owner-url: http://marty.weathercity.com/
robot-owner-email: rhondle@home.com
robot-status: Active
robot-purpose: indexing
robot-type: standalone
robot-platform: Windows
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent: 
robot-noindex: no
robot-host: 24.113.12.29
robot-from: no
robot-useragent:
robot-language: VB5
robot-description: Used to gather sites for netbreach.com. Runs constantly.
robot-history: Developed as an experiment in web indexing.
robot-environment: hobby, service
modified-date: Tue, 3 Mar 1999 08:15:00 PST
modified-by: Marty Anstey

robot-id:                      sygol
robot-name:                    Sygol 
robot-cover-url:               http://www.sygol.com
robot-details-url:             http://www.sygol.com/who.asp
robot-owner-name:              Giorgio Galeotti
robot-owner-url:               http://www.sygol.com
robot-owner-email:             webmaster@sygol.com
robot-status:                  active
robot-purpose:                 indexing: gather pages for the Sygol search engine
robot-type:                    standalone
robot-platform:                All Windows from 95 to latest.
robot-availability:            none
robot-exclusion:               yes
robot-exclusion-useragent:     http://www.sygol.com
robot-noindex:                 no
robot-host:                    http://www.sygol.com
robot-from:                    No
robot-useragent:               http://www.sygol.com
robot-language:                Visual Basic
robot-description:             Very standard robot: it gets all words and
 links from a page end then indexes the first and stores the latter for further
 crawling.
robot-history:                 It all started in 1999 as a hobby to try
 crawling the web and putting together a good search engine with very little
 hardware resources.
robot-environment:             Hobby
modified-date:                 Mon, 07 Jun 2004 14:50:01 GMT
modified-by:                   Giorgio Galeotti

robot-id: tach_bw
robot-name: TACH Black Widow
robot-cover-url: http://theautochannel.com/~mjenn/bw.html
robot-details-url: http://theautochannel.com/~mjenn/bw-syntax.html
robot-owner-name: Michael Jennings
robot-owner-url: http://www.spd.louisville.edu/~mejenn01/
robot-owner-email: mjenn@theautochannel.com
robot-status: development
robot-purpose: maintenance: link validation
robot-type: standalone
robot-platform: UNIX, Linux
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: tach_bw
robot-noindex: no
robot-host: *.theautochannel.com
robot-from: yes
robot-useragent: Mozilla/3.0 (Black Widow v1.1.0; Linux 2.0.27; Dec 31 1997 12:25:00
robot-language: C/C++
robot-description: Exhaustively recurses a single site to check for broken links
robot-history: Corporate application begun in 1996 for The Auto Channel
robot-environment: commercial
modified-date: Thu, Jan 23 1997 23:09:00 GMT
modified-by: Michael Jennings

robot-id:tarantula
robot-name: Tarantula
robot-cover-url: http://www.nathan.de/nathan/software.html#TARANTULA
robot-details-url: http://www.nathan.de/
robot-owner-name: Markus Hoevener
robot-owner-url:
robot-owner-email: Markus.Hoevener@evision.de
robot-status: development
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: yes
robot-noindex: yes
robot-host: yes
robot-from: no
robot-useragent: Tarantula/1.0
robot-language: C
robot-description: Tarantual gathers information for german search engine Nathanrobot-history: Started February 1997
robot-environment: service
modified-date: Mon, 29 Dec 1997 15:30:00 GMT
modified-by: Markus Hoevener

robot-id:           tarspider
robot-name:         tarspider
robot-cover-url:    
robot-details-url:
robot-owner-name:   Olaf Schreck
robot-owner-url:    http://www.chemie.fu-berlin.de/user/chakl/ChaklHome.html
robot-owner-email:  chakl@fu-berlin.de
robot-status:       
robot-purpose:      mirroring
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         chakl@fu-berlin.de
robot-useragent:    tarspider
robot-language:     
robot-description:  
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id:           tcl
robot-name:         Tcl W3 Robot
robot-cover-url:    http://hplyot.obspm.fr/~dl/robo.html
robot-details-url:
robot-owner-name:   Laurent Demailly
robot-owner-url:    http://hplyot.obspm.fr/~dl/
robot-owner-email:  dl@hplyot.obspm.fr
robot-status:       
robot-purpose:      maintenance, statistics
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         hplyot.obspm.fr
robot-from:         yes
robot-useragent:    dlw3robot/x.y (in TclX by http://hplyot.obspm.fr/~dl/)
robot-language:     tcl
robot-description:  Its purpose is to validate links, and generate
	statistics.
robot-history:      
robot-environment:
modified-date:      Tue May 23 17:51:39 1995
modified-by:

robot-id: techbot
robot-name: TechBOT
robot-cover-url: http://www.techaid.net/
robot-details-url: http://www.echaid.net/TechBOT/
robot-owner-name: TechAID Internet Services
robot-owner-url: http://www.techaid.net/
robot-owner-email: techbot@techaid.net
robot-status: active
robot-purpose:statistics, maintenance
robot-type: standalone
robot-platform: Unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: TechBOT
robot-noindex: yes
robot-host: techaid.net
robot-from: yes
robot-useragent: TechBOT
robot-language: perl5
robot-description: TechBOT is constantly upgraded. Currently he is used for
 Link Validation, Load Time, HTML Validation and much much more.
robot-history: TechBOT started his life as a Page Change Detection robot,
 but has taken on many new and exciting roles.
robot-environment: service
modified-date: Sat, 18 Dec 1998 14:26:00 EST
modified-by: techbot@techaid.net

robot-id: templeton
robot-name: Templeton
robot-cover-url: http://www.bmtmicro.com/catalog/tton/
robot-details-url: http://www.bmtmicro.com/catalog/tton/
robot-owner-name: Neal Krawetz
robot-owner-url: http://www.cs.tamu.edu/people/nealk/
robot-owner-email: nealk@net66.com
robot-status: active
robot-purpose: mirroring, mapping, automating web applications
robot-type: standalone
robot-platform: OS/2, Linux, SunOS, Solaris
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: templeton
robot-noindex: no
robot-host: *
robot-from: yes
robot-useragent: Templeton/{version} for {platform}
robot-language: C
robot-description: Templeton is a very configurable robots for mirroring, mapping, and automating applications on retrieved documents.
robot-history: This robot was originally created as a test-of-concept.
robot-environment: service, commercial, research, hobby
modified-date: Sun, 6 Apr 1997 10:00:00 GMT
modified-by: Neal Krawetz

robot-id: titin
robot-name: TitIn
robot-cover-url: http://www.foi.hr/~dpavlin/titin/
robot-details-url: http://www.foi.hr/~dpavlin/titin/tehnical.htm
robot-owner-name: Dobrica Pavlinusic
robot-owner-url: http://www.foi.hr/~dpavlin/
robot-owner-email: dpavlin@foi.hr
robot-status: development
robot-purpose: indexing, statistics
robot-type: standalone
robot-platform: unix
robot-availability: data, source on request
robot-exclusion: yes
robot-exclusion-useragent: titin
robot-noindex: no
robot-host: barok.foi.hr
robot-from: no
robot-useragent: TitIn/0.2
robot-language: perl5, c
robot-description:
        The TitIn is used to index all titles of Web server in
        .hr domain.
robot-history:
        It was done as result of desperate need for central index of
        Croatian web servers in December 1996.
robot-environment: research
modified-date: Thu, 12 Dec 1996 16:06:42 MET
modified-by: Dobrica Pavlinusic

robot-id:           titan
robot-name:         TITAN
robot-cover-url:    http://isserv.tas.ntt.jp/chisho/titan-e.html
robot-details-url:  http://isserv.tas.ntt.jp/chisho/titan-help/eng/titan-help-e.html
robot-owner-name:   Yoshihiko HAYASHI
robot-owner-url:    
robot-owner-email:  hayashi@nttnly.isl.ntt.jp
robot-status:       active
robot-purpose:      indexing
robot-type:         standalone
robot-platform:     SunOS 4.1.4
robot-availability: no
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         nlptitan.isl.ntt.jp
robot-from:         yes
robot-useragent:    TITAN/0.1
robot-language:     perl 4
robot-description:  Its purpose is to generate a Resource Discovery
    database, and copy document trees. Our primary goal is to develop
    an advanced method for indexing the WWW documents. Uses libwww-perl
robot-history:      
robot-environment:
modified-date:      Mon Jun 24 17:20:44 PDT 1996
modified-by:        Yoshihiko HAYASHI

robot-id:           tkwww
robot-name:         The TkWWW Robot
robot-cover-url:    http://fang.cs.sunyit.edu/Robots/tkwww.html
robot-details-url:
robot-owner-name:   Scott Spetka
robot-owner-url:    http://fang.cs.sunyit.edu/scott/scott.html
robot-owner-email:  scott@cs.sunyit.edu
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  It is designed to search Web neighborhoods to find pages
	that may be logically related. The Robot returns a list of
	links that looks like a hot list. The search can be by key
	word or all links at a distance of one or two hops may be
	returned. The TkWWW Robot is described in a paper presented
	at the WWW94 Conference in Chicago.
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: tlspider
robot-name:TLSpider
robot-cover-url: n/a
robot-details-url: n/a
robot-owner-name: topiclink.com
robot-owner-url: topiclink.com
robot-owner-email: tlspider@outtel.com
robot-status: not activated
robot-purpose: to get web sites and add them to the topiclink future directory
robot-type:development: robot under development
robot-platform:linux
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:topiclink
robot-noindex:no
robot-host: tlspider.topiclink.com (not avalible yet)
robot-from:no
robot-useragent:TLSpider/1.1
robot-language:perl5
robot-description:This robot runs 2 days a week getting information for
 TopicLink.com
robot-history:This robot was created to server for the internet search engine
 TopicLink.com
robot-environment:service
modified-date:September,10,1999 17:28 GMT
modified-by: TopicLink Spider Team

robot-id:           ucsd
robot-name:         UCSD Crawl
robot-cover-url:    http://www.mib.org/~ucsdcrawl
robot-details-url:
robot-owner-name:   Adam Tilghman
robot-owner-url:    http://www.mib.org/~atilghma
robot-owner-email:  atilghma@mib.org
robot-status:
robot-purpose:      indexing, statistics
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         nuthaus.mib.org scilib.ucsd.edu
robot-from:         yes
robot-useragent:    UCSD-Crawler
robot-language:     Perl 4
robot-description:  Should hit ONLY within UC San Diego - trying to count
	servers here.
robot-history:
robot-environment:
modified-date:      Sat Jan 27 09:21:40 1996.
modified-by:

robot-id: udmsearch
robot-name: UdmSearch
robot-details-url: http://mysearch.udm.net/
robot-cover-url: http://mysearch.udm.net/
robot-owner-name: Alexander Barkov
robot-owner-url: http://mysearch.udm.net/
robot-owner-email: bar@izhcom.ru
robot-status: active
robot-purpose: indexing, validation
robot-type: standalone
robot-platform: unix
robot-availability: source, binary
robot-exclusion: yes
robot-exclusion-useragent: UdmSearch
robot-noindex: yes
robot-host: *
robot-from: no
robot-useragent: UdmSearch/2.1.1
robot-language: c
robot-description: UdmSearch is a free web search engine software for
 intranet/small domain internet servers
robot-history: Developed since 1998, origin purpose is a search engine
 over republic of Udmurtia http://search.udm.net
robot-environment: hobby
modified-date: Mon, 6 Sep 1999 10:28:52 GMT

robot-id: uptimebot
robot-name: UptimeBot
robot-cover-url: http://www.uptimebot.com
robot-details-url: http://www.uptimebot.com
robot-owner-name: UCO team
robot-owner-url: http://www.uptimebot.com
robot-owner-email: luft_master@ukr.net
robot-status: active
robot-purpose: indexing, statistics
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: uptimebot
robot-exclusion-useragent: no
robot-noindex: no
robot-host: uptimebot.com
robot-from: no
robot-useragent: uptimebot
robot-language: c++
robot-description: UptimeBot is a web crawler that checks return codes of web
 servers and calculates average number of current servers status. The robot
 runs daily, and visits sites in a random order.
robot-history: This robot is a local research product of the UtimeBot team.
robot-environment: research
modified-date: Sat, 19 March 2004 21:19:03 GMT
modified-by: UptimeBot team

robot-id:                   urlck
robot-name:                 URL Check
robot-cover-url:            http://www.cutternet.com/products/webcheck.html
robot-details-url:          http://www.cutternet.com/products/urlck.html
robot-owner-name:           Dave Finnegan
robot-owner-url:            http://www.cutternet.com
robot-owner-email:          dave@cutternet.com
robot-status:               active
robot-purpose:              maintenance
robot-type:                 standalone
robot-platform:             unix
robot-availability:         binary
robot-exclusion:            yes
robot-exclusion-useragent:  urlck
robot-noindex:              no
robot-host:                 *
robot-from:                 yes
robot-useragent:            urlck/1.2.3
robot-language:             c
robot-description:          The robot is used to manage, maintain, and modify
                            web sites.  It builds a database detailing the
                            site, builds HTML reports describing the site, and
                            can be used to up-load pages to the site or to
                            modify existing pages and URLs within the site.  It
                            can also be used to mirror whole or partial sites.
                            It supports HTTP, File, FTP, and Mailto schemes.
robot-history:              Originally designed to validate URLs.
robot-environment:          commercial
modified-date:              July 9, 1997
modified-by:                Dave Finnegan

robot-id: us
robot-name: URL Spider Pro
robot-cover-url: http://www.innerprise.net
robot-details-url: http://www.innerprise.net/us.htm
robot-owner-name: Innerprise
robot-owner-url: http://www.innerprise.net
robot-owner-email: greg@innerprise.net
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Windows9x/NT
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: *
robot-noindex: yes
robot-host: *
robot-from: no
robot-useragent: URL Spider Pro
robot-language: delphi
robot-description: Used for building a database of web pages.
robot-history: Project started July 1998.
robot-environment: commercial
modified-date: Mon, 12 Jul 1999 17:50:30 GMT
modified-by: Innerprise

robot-id: valkyrie
robot-name: Valkyrie
robot-cover-url: http://kichijiro.c.u-tokyo.ac.jp/odin/
robot-details-url: http://kichijiro.c.u-tokyo.ac.jp/odin/robot.html
robot-owner-name: Masanori Harada
robot-owner-url: http://www.graco.c.u-tokyo.ac.jp/~harada/
robot-owner-email: harada@graco.c.u-tokyo.ac.jp
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Valkyrie libwww-perl
robot-noindex: no
robot-host: *.c.u-tokyo.ac.jp
robot-from: yes
robot-useragent: Valkyrie/1.0 libwww-perl/0.40
robot-language: perl4
robot-description: used to collect resources from Japanese Web sites for ODIN search engine.
robot-history: This robot has been used since Oct. 1995 for author's research.
robot-environment: service research
modified-date: Thu Mar 20 19:09:56 JST 1997
modified-by: harada@graco.c.u-tokyo.ac.jp

robot-id: verticrawl
robot-name: Verticrawl
robot-cover-url: http://www.verticrawl.com/
robot-details-url: http://www.verticrawl.com/
robot-owner-name: Datamean, Malinge, Lhuisset
robot-owner-url: http://www.verticrawl.com/
robot-owner-email: webmaster@verticrawl.com
robot-status: active
robot-purpose: indexing, searching, and classifying urls in a global ASP search & Appliance search solution
robot-type: standalone
robot-platform: Unix, Linux
robot-availability: none
robot-exclusion:  verticrawl
robot-exclusion-useragent: verticrawl
robot-noindex: yes
robot-host: http://www.verticrawl.com/
robot-from: Yes
robot-useragent:  Verticrawlbot
robot-language:  c, perl, php
robot-description: Verticrawl is a global search engine dedicated to appliance service providing in ASP search & Appliance search solution
robot-history: Verticrawl is based on web services for knowledge management and Web portals services and sitesearch solutions
robot-environment: commercial
modified-date: mon,  27 Jul 2006 17:28:52 GMT
modified-by: webmaster@verticrawl.com

robot-id: victoria
robot-name: Victoria
robot-cover-url:
robot-details-url:
robot-owner-name: Adrian Howard
robot-owner-url:
robot-owner-email: adrianh@oneworld.co.uk
robot-status: development
robot-purpose: maintenance
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Victoria
robot-noindex: yes
robot-host:
robot-from:
robot-useragent: Victoria/1.0
robot-language: perl,c
robot-description: Victoria is part of a groupware produced
 by Victoria Real Ltd. (voice: +44 [0]1273 774469,
 fax: +44 [0]1273 779960 email: victoria@pavilion.co.uk).
 Victoria is used to monitor changes in W3 documents,
 both intranet and internet based.
 Contact Victoria Real for more information.
robot-history:
robot-environment: commercial
modified-date: Fri, 22 Nov 1996 16:45 GMT
modified-by: victoria@pavilion.co.uk

robot-id:           visionsearch
robot-name:         vision-search
robot-cover-url:    http://www.ius.cs.cmu.edu/cgi-bin/vision-search
robot-details-url:
robot-owner-name:   Henry A. Rowley
robot-owner-url:    http://www.cs.cmu.edu/~har
robot-owner-email:  har@cs.cmu.edu
robot-status:
robot-purpose:      indexing.
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:
robot-host:         dylan.ius.cs.cmu.edu
robot-from:         no
robot-useragent:    vision-search/3.0'
robot-language:     Perl 5
robot-description:  Intended to be an index of computer vision pages, containing
	all pages within <em>n</em> links (for some small
	<em>n</em>) of the Vision Home Page
robot-history:
robot-environment:
modified-date:      Fri Mar  8 16:03:04 1996
modified-by:

robot-id: voidbot
robot-name: void-bot
robot-cover-url: http://www.void.be/
robot-details-url: http://www.void.be/void-bot.html
robot-owner-name: Tristan Crombez
robot-owner-url: http://www.void.be/tristan/
robot-owner-email: bot@void.be
robot-status: development
robot-purpose: indexing,maintenance
robot-type: standalone
robot-platform: FreeBSD,Linux
robot-availability: none
robot-exclusion: no
robot-exclusion-useragent: void-bot
robot-noindex: no
robot-host: void.be
robot-from: no
robot-useragent: void-bot/0.1 (bot@void.be; http://www.void.be/)
robot-language: perl5
robot-description: The void-bot is
 used to build a database for the void search service, as well as for link
 validation.
robot-history: Development was started in october 2003, spidering
 began in january 2004.
robot-environment: research
modified-date: Mon, 9 Feb 2004 11:51:10 GMT
modified-by: bot@void.be

robot-id: voyager
robot-name: Voyager
robot-cover-url: http://www.lisa.co.jp/voyager/
robot-details-url:
robot-owner-name: Voyager Staff
robot-owner-url: http://www.lisa.co.jp/voyager/
robot-owner-email: voyager@lisa.co.jp
robot-status: development
robot-purpose: indexing, maintenance
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Voyager 
robot-noindex: no
robot-host: *.lisa.co.jp
robot-from: yes
robot-useragent: Voyager/0.0
robot-language: perl5 
robot-description: This robot is used to build the database for the
                   Lisa Search service.  The robot manually launch  
                   and visits sites in a random order.
robot-history:
robot-environment: service
modified-date: Mon, 30 Nov 1998 08:00:00 GMT
modified-by: Hideyuki Ezaki

robot-id: vwbot
robot-name: VWbot
robot-cover-url: http://vancouver-webpages.com/VWbot/
robot-details-url: http://vancouver-webpages.com/VWbot/aboutK.shtml
robot-owner-name: Andrew Daviel
robot-owner-url:  http://vancouver-webpages.com/~admin/
robot-owner-email: andrew@vancouver-webpages.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: VWbot_K
robot-noindex: yes
robot-host: vancouver-webpages.com
robot-from: yes
robot-useragent: VWbot_K/4.2
robot-language: perl4
robot-description: Used to index BC sites for the searchBC database. Runs daily.
robot-history: Originally written fall 1995. Actively maintained.
robot-environment: service commercial research
modified-date: Tue, 4 Mar 1997 20:00:00 GMT
modified-by: Andrew Daviel

robot-id:             w3index
robot-name:           The NWI Robot
robot-cover-url:            http://www.ub2.lu.se/NNC/projects/NWI/the_nwi_robot.html
robot-owner-name:     Sigfrid Lundberg, Lund university, Sweden
robot-owner-url:      http://nwi.ub2.lu.se/~siglun
robot-owner-email:    siglun@munin.ub2.lu.se
robot-status:         active
robot-purpose:        discovery,statistics
robot-type:           standalone
robot-platform:       UNIX
robot-availability:   none (at the moment)
robot-exclusion:      yes
robot-noindex:        No
robot-host:   nwi.ub2.lu.se, mars.dtv.dk and a few others
robot-from:   yes
robot-useragent:      w3index
robot-language:       perl5
robot-description:    A resource discovery robot, used primarily for
	the indexing of the Scandinavian Web
robot-history:        It is about a year or so old.
	Written by Anders Ard, Mattias Borrell, 
	Hkan Ard and myself.
robot-environment: service,research
modified-date:        Wed Jun 26 13:58:04 MET DST 1996
modified-by:          Sigfrid Lundberg

robot-id:           w3m2
robot-name:         W3M2
robot-cover-url:    http://tronche.com/W3M2
robot-details-url:
robot-owner-name:   Christophe Tronche
robot-owner-url:    http://tronche.com/
robot-owner-email:  tronche@lri.fr
robot-status:       
robot-purpose:      indexing, maintenance, statistics
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         yes
robot-useragent:    W3M2/x.xxx
robot-language:     Perl 4, Perl 5, and C++
robot-description:  to generate a Resource Discovery database, validate links,
	validate HTML, and generate statistics
robot-history:      
robot-environment:
modified-date:      Fri May 5 17:48:48 1995
modified-by:

robot-id: wallpaper
robot-name: WallPaper (alias crawlpaper)
robot-cover-url: http://www.crawlpaper.com/
robot-details-url: http://sourceforge.net/projects/crawlpaper/
robot-owner-name: Luca Piergentili
robot-owner-url: http://www.geocities.com/lpiergentili/
robot-owner-email: lpiergentili@yahoo.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: windows
robot-availability: source, binary
robot-exclusion: yes
robot-exclusion-useragent: crawlpaper
robot-noindex: no
robot-host:
robot-from:
robot-useragent: CrawlPaper/n.n.n (Windows n)
robot-language: C++
robot-description: a crawler for pictures download and offline browsing
robot-history: started as screensaver the program has evolved to a crawler
 including an audio player, etc.
robot-environment: hobby
modified-date: Mon, 25 Aug 2003 09:00:00 GMT
modified-by:

robot-id:           wanderer
robot-name:         the World Wide Web Wanderer
robot-cover-url:    http://www.mit.edu/people/mkgray/net/
robot-details-url:
robot-owner-name:   Matthew Gray
robot-owner-url:    http://www.mit.edu:8001/people/mkgray/mkgray.html
robot-owner-email:  mkgray@mit.edu
robot-status:       active
robot-purpose:      statistics
robot-type:         standalone
robot-platform:     unix
robot-availability: data
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *.mit.edu
robot-from:         
robot-useragent:    WWWWanderer v3.0
robot-language:     perl4
robot-description:  Run initially in June 1993, its aim is to measure
                    the growth in the web.
robot-history:      
robot-environment:  research
modified-date:      
modified-by:

robot-id: wapspider
robot-name: w@pSpider by wap4.com
robot-cover-url: http://mopilot.com/
robot-details-url: http://wap4.com/portfolio.htm
robot-owner-name: Dieter Kneffel
robot-owner-url: http://wap4.com/ (corporate)
robot-owner-email: info@wap4.com
robot-status: active
robot-purpose: indexing, maintenance (special: dedicated to wap/wml pages)
robot-type: standalone
robot-platform: unix
robot-availability: data
robot-exclusion: yes
robot-exclusion-useragent: wapspider
robot-noindex: [does not apply for wap/wml pages!]
robot-host: *.wap4.com, *.mopilot.com
robot-from: yes
robot-useragent: w@pSpider/xxx (unix) by wap4.com
robot-language: c, php, sql
robot-description: wapspider is used to build the database for
 mopilot.com, a search engine for mobile contents; it is specially
 designed to crawl wml-pages. html is indexed, but html-links are
 (currently) not followed
robot-history: this robot was developed by wap4.com in 1999 for the
 world's first wap-search engine
robot-environment: service, commercial, research
modified-date: Fri, 23 Jun 2000 14:33:52 MESZ
modified-by: Dieter Kneffel, data@wap4.com

robot-id:webbandit
robot-name:WebBandit Web Spider
robot-cover-url:http://pw2.netcom.com/~wooger/
robot-details-url:http://pw2.netcom.com/~wooger/
robot-owner-name:Jerry Walsh
robot-owner-url:http://pw2.netcom.com/~wooger/
robot-owner-email:wooger@ix.netcom.com
robot-status:active
robot-purpose:Resource Gathering / Server Benchmarking
robot-type:standalone application
robot-platform:Intel - windows95
robot-availability:source, binary
robot-exclusion:no
robot-exclusion-useragent:WebBandit/1.0
robot-noindex:no
robot-host:ix.netcom.com
robot-from:no
robot-useragent:WebBandit/1.0
robot-language:C++
robot-description:multithreaded, hyperlink-following,
 resource finding webspider 
robot-history:Inspired by reading of
 Internet Programming book by Jamsa/Cope 
robot-environment:commercial 
modified-date:11/21/96
modified-by:Jerry Walsh

robot-id: webcatcher
robot-name: WebCatcher
robot-cover-url: http://oscar.lang.nagoya-u.ac.jp
robot-details-url:
robot-owner-name: Reiji SUZUKI
robot-owner-url: http://oscar.lang.nagoya-u.ac.jp/~reiji/index.html
robot-owner-email: reiji@infonia.ne.jp
robot-owner-name2: Masatoshi SUGIURA
robot-owner-url2: http://oscar.lang.nagoya-u.ac.jp/~sugiura/index.html
robot-owner-email2: sugiura@lang.nagoya-u.ac.jp
robot-status: development
robot-purpose: indexing  
robot-type: standalone   
robot-platform: unix, windows, mac
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: webcatcher
robot-noindex: no
robot-host: oscar.lang.nagoya-u.ac.jp
robot-from: no
robot-useragent: WebCatcher/1.0
robot-language: perl5
robot-description: WebCatcher gathers web pages
                   that Japanese collage students want to visit.
robot-history: This robot finds its roots in a research project 
           at Nagoya University in 1998.
robot-environment: research
modified-date: Fri, 16 Oct 1998 17:28:52 JST
modified-by: "Reiji SUZUKI" <reiji@infonia.ne.jp>

robot-id:           webcopy
robot-name:         WebCopy
robot-cover-url:    http://www.inf.utfsm.cl/~vparada/webcopy.html
robot-details-url:
robot-owner-name:   Victor Parada
robot-owner-url:    http://www.inf.utfsm.cl/~vparada/
robot-owner-email:  vparada@inf.utfsm.cl
robot-status:       
robot-purpose:      mirroring
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         *
robot-from:         no
robot-useragent:    WebCopy/(version)
robot-language:     perl 4 or perl 5
robot-description:  Its purpose is to perform mirroring. WebCopy can retrieve
	files recursively using HTTP protocol.It can be used as a
	delayed browser or as a mirroring tool. It cannot jump from
	one site to another.
robot-history:      
robot-environment:
modified-date:      Sun Jul 2 15:27:04 1995
modified-by:

robot-id:           webfetcher
robot-name:         webfetcher
robot-cover-url:    http://www.ontv.com/
robot-details-url:
robot-owner-name:
robot-owner-url:    http://www.ontv.com/
robot-owner-email:  webfetch@ontv.com
robot-status:
robot-purpose:      mirroring
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:         *
robot-from:         yes
robot-useragent:    WebFetcher/0.8, 
robot-language:     C++
robot-description:  don't wait! OnTV's WebFetcher mirrors whole sites down to
	your hard disk on a TV-like schedule. Catch w3
	documentation. Catch discovery.com without waiting! A fully
	operational web robot for NT/95 today, most UNIX soon, MAC
	tomorrow.
robot-history:
robot-environment:
modified-date:      Sat Jan 27 10:31:43 1996.
modified-by:

robot-id:           webfoot
robot-name:         The Webfoot Robot
robot-cover-url:    
robot-details-url:
robot-owner-name:   Lee McLoughlin
robot-owner-url:    http://web.doc.ic.ac.uk/f?/lmjm
robot-owner-email:  L.McLoughlin@doc.ic.ac.uk
robot-status:       
robot-purpose:      
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      
robot-host:         phoenix.doc.ic.ac.uk
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  
robot-history:      First spotted in Mid February 1994
robot-environment:
modified-date:      
modified-by:

robot-id: webinator
robot-name: Webinator
robot-details-url: http://www.thunderstone.com/texis/site/pages/webinator4_admin.html
robot-cover-url: http://www.thunderstone.com/texis/site/pages/webinator.html
robot-owner-name: 
robot-owner-email: 
robot-status: active, under further enhancement.
robot-purpose: information retrieval
robot-type: standalone
robot-exclusion: yes
robot-noindex: yes
robot-exclusion-useragent: T-H-U-N-D-E-R-S-T-O-N-E
robot-host: several
robot-from: No
robot-language: Texis Vortex
robot-history: 
robot-environment: Commercial

robot-id:           weblayers
robot-name:         weblayers
robot-cover-url:    http://www.univ-paris8.fr/~loic/weblayers/
robot-details-url:
robot-owner-name:   Loic Dachary
robot-owner-url:    http://www.univ-paris8.fr/~loic/
robot-owner-email:  loic@afp.com
robot-status:       
robot-purpose:      maintainance
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         
robot-useragent:    weblayers/0.0
robot-language:     perl 5
robot-description:  Its purpose is to validate, cache and maintain links. It is
	designed to maintain the cache generated by the emacs emacs
	w3 mode (N*tscape replacement) and to support annotated
	documents (keep them in sync with the original document via
	diff/patch).
robot-history:      
robot-environment:
modified-date:      Fri Jun 23 16:30:42 FRE 1995
modified-by:

robot-id:           weblinker
robot-name:         WebLinker
robot-cover-url:    http://www.cern.ch/WebLinker/
robot-details-url:
robot-owner-name:   James Casey
robot-owner-url:    http://www.maths.tcd.ie/hyplan/jcasey/jcasey.html
robot-owner-email:  jcasey@maths.tcd.ie
robot-status:       
robot-purpose:      maintenance
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      
robot-host:         
robot-from:         
robot-useragent:    WebLinker/0.0 libwww-perl/0.1
robot-language:     
robot-description:  it traverses a section of web, doing URN->URL conversion.
        It will be used as a post-processing tool on documents created
	by automatic converters such as LaTeX2HTML or WebMaker. At
	the moment it works at full speed, but is restricted to
	localsites. External GETs will be added, but these will be
	running slowly. WebLinker is meant to be run locally, so if
	you see it elsewhere let the author know!
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id:           webmirror
robot-name:         WebMirror
robot-cover-url:    http://www.winsite.com/pc/win95/netutil/wbmiror1.zip
robot-details-url:
robot-owner-name:   Sui Fung Chan
robot-owner-url:    http://www.geocities.com/NapaVally/1208
robot-owner-email:  sfchan@mailhost.net
robot-status:
robot-purpose:      mirroring
robot-type:         standalone
robot-platform:     Windows95
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         no
robot-useragent:    no
robot-language:     C++
robot-description:  It download web pages to hard drive for off-line
	browsing.
robot-history:
robot-environment:
modified-date:      Mon Apr 29 08:52:25 1996.
modified-by:

robot-id: webmoose
robot-name: The Web Moose
robot-cover-url: 
robot-details-url: http://www.nwlink.com/~mikeblas/webmoose/
robot-owner-name: Mike Blaszczak
robot-owner-url: http://www.nwlink.com/~mikeblas/
robot-owner-email: mikeblas@nwlink.com
robot-status: development
robot-purpose: statistics, maintenance
robot-type: standalone
robot-platform: Windows NT
robot-availability: data
robot-exclusion: no
robot-exclusion-useragent: WebMoose
robot-noindex: no
robot-host: msn.com
robot-from: no
robot-useragent: WebMoose/0.0.0000
robot-language: C++
robot-description: This robot collects statistics and verifies links.
 It 
 builds an graph of its visit path.
robot-history: This robot is under development.
 It will support ROBOTS.TXT soon.
robot-environment: hobby
modified-date: Fri, 30 Aug 1996 00:00:00 GMT
modified-by: Mike Blaszczak

robot-id:webquest
robot-name:WebQuest
robot-cover-url:
robot-details-url:
robot-owner-name:TaeYoung Choi
robot-owner-url:http://www.cosmocyber.co.kr:8080/~cty/index.html
robot-owner-email:cty@cosmonet.co.kr
robot-status:development
robot-purpose:indexing
robot-type:standalone
robot-platform:unix
robot-availability:none
robot-exclusion:yes
robot-exclusion-useragent:webquest
robot-noindex:no
robot-host:210.121.146.2, 210.113.104.1, 210.113.104.2
robot-from:yes
robot-useragent:WebQuest/1.0
robot-language:perl5
robot-description:WebQuest will be used to build the databases for various web
 search service sites which will be in service by early 1998. Until the end of
 Jan. 1998, WebQuest will run from time to time. Since then, it will run
 daily(for few hours and very slowly).
robot-history:The developent of WebQuest was motivated by the need for a
 customized robot in various projects of COSMO Information & Communication Co.,
 Ltd. in Korea.
robot-environment:service  
modified-date:Tue, 30 Dec 1997 09:27:20 GMT
modified-by:TaeYoung Choi

robot-id: webreader
robot-name: Digimarc MarcSpider
robot-cover-url: http://www.digimarc.com/prod_fam.html
robot-details-url: http://www.digimarc.com/prod_fam.html
robot-owner-name: Digimarc Corporation
robot-owner-url: http://www.digimarc.com
robot-owner-email: wmreader@digimarc.com
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: windowsNT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent:
robot-noindex:
robot-host: 206.102.3.*
robot-from: yes
robot-useragent: Digimarc WebReader/1.2
robot-language: c++
robot-description: Examines image files for watermarks. 
 In order to not waste internet bandwidth with yet
 another crawler, we have contracted with one of the major crawlers/seach
 engines to provide us with a list of specific URLs of interest to us.  If an
 URL is to an image, we may read the image, but we do not crawl to any other
 URLs.  If an URL is to a page of interest (ususally due to CGI), then we
 access the page to get the image URLs from it, but we do not crawl to any
 other pages.
robot-history: First operation in August 1997.
robot-environment: service
modified-date: Mon, 20 Oct 1997 16:44:29 GMT
modified-by: Brian MacIntosh

robot-id: webreaper
robot-name: WebReaper
robot-cover-url: http://www.otway.com/webreaper
robot-details-url:
robot-owner-name: Mark Otway
robot-owner-url: http://www.otway.com
robot-owner-email: webreaper@otway.com
robot-status: active
robot-purpose: indexing/offline browsing
robot-type: standalone
robot-platform: windows95, windowsNT
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: webreaper
robot-noindex: no
robot-host: *
robot-from: no
robot-useragent: WebReaper [webreaper@otway.com]
robot-language: c++
robot-description: Freeware app which downloads and saves sites locally for
 offline browsing.
robot-history: Written for personal use, and then distributed to the public
 as freeware.
robot-environment: hobby
modified-date: Thu, 25 Mar 1999 15:00:00 GMT
modified-by: Mark Otway

robot-id:                       webs
robot-name:                     webs
robot-cover-url:                http://webdew.rnet.or.jp/
robot-details-url:              http://webdew.rnet.or.jp/service/shank/NAVI/SEARCH/info2.html#robot
robot-owner-name:               Recruit Co.Ltd,
robot-owner-url:                
robot-owner-email:              dew@wwwadmin.rnet.or.jp
robot-status:                   active
robot-purpose:                  statistics
robot-type:                     standalone
robot-platform:                 unix
robot-availability:             none
robot-exclusion:                yes
robot-exclusion-useragent:      webs
robot-noindex:                  no
robot-host:                     lemon.recruit.co.jp
robot-from:                     yes
robot-useragent:                webs@recruit.co.jp
robot-language:                 perl5
robot-description:              The webs robot is used to gather WWW servers'
                                top pages last modified date data. Collected
                                statistics reflects the priority of WWW server
                                data collection for webdew indexing service.
                                Indexing in webdew is done by manually.
robot-history:
robot-environment:              service
modified-date:                  Fri,  6 Sep 1996 10:00:00 GMT
modified-by:

robot-id:           websnarf
robot-name:         Websnarf
robot-cover-url:    
robot-details-url:
robot-owner-name:   Charlie Stross
robot-owner-url:    
robot-owner-email:  charles@fma.com
robot-status:       retired
robot-purpose:      
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: webspider
robot-name: WebSpider
robot-details-url: http://www.csi.uottawa.ca/~u610468
robot-cover-url:
robot-owner-name: Nicolas Fraiji
robot-owner-email: u610468@csi.uottawa.ca
robot-status: active, under further enhancement.
robot-purpose: maintenance, link diagnostics
robot-type: standalone
robot-exclusion: yes
robot-noindex: no
robot-exclusion-useragent: webspider
robot-host: several
robot-from: Yes
robot-language: Perl4
robot-history: developped as a course project at the University of
     Ottawa, Canada in 1996.
robot-environment: Educational use and Research

robot-id:           webvac
robot-name:         WebVac
robot-cover-url:    http://www.federated.com/~tim/webvac.html
robot-details-url:
robot-owner-name:   Tim Jensen
robot-owner-url:    http://www.federated.com/~tim
robot-owner-email:  tim@federated.com
robot-status:
robot-purpose:      mirroring
robot-type:         standalone
robot-platform:
robot-availability:
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         no
robot-useragent:    webvac/1.0
robot-language:     C++
robot-description:
robot-history:
robot-environment:
modified-date:      Mon May 13 03:19:17 1996.
modified-by:

robot-id:           webwalk
robot-name:         webwalk
robot-cover-url:    
robot-details-url:
robot-owner-name:   Rich Testardi
robot-owner-url:    
robot-owner-email:  
robot-status:       retired
robot-purpose:      indexing, maintentance, mirroring, statistics
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    yes
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         yes
robot-useragent:    webwalk
robot-language:     c
robot-description:  Its purpose is to generate a Resource Discovery database,
	validate links, validate HTML, perform mirroring, copy
	document trees, and generate statistics. Webwalk is easily
	extensible to perform virtually any maintenance function
	which involves web traversal, in a way much like the '-exec'
	option of the find(1) command. Webwalk is usually used
	behind the HP firewall
robot-history:      
robot-environment:
modified-date:      Wed Nov 15 09:51:59 PST 1995
modified-by:

robot-id: webwalker
robot-name: WebWalker
robot-cover-url:
robot-details-url:
robot-owner-name: Fah-Chun Cheong
robot-owner-url: http://www.cs.berkeley.edu/~fccheong/
robot-owner-email: fccheong@cs.berkeley.edu
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: unix
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: WebWalker
robot-noindex: no
robot-host: *
robot-from: yes
robot-useragent: WebWalker/1.10
robot-language: perl4
robot-description: WebWalker performs WWW traversal for individual
                   sites and tests for the integrity of all hyperlinks
                   to external sites. 
robot-history: A Web maintenance robot for expository purposes,
               first published in the book "Internet Agents: Spiders,
               Wanderers, Brokers, and Bots" by the robot's author.
robot-environment: hobby
modified-date: Thu, 25 Jul 1996 16:00:52 PDT
modified-by: Fah-Chun Cheong

robot-id:           webwatch
robot-name:         WebWatch
robot-cover-url:    http://www.specter.com/users/janos/specter
robot-details-url:
robot-owner-name:   Joseph Janos
robot-owner-url:    http://www.specter.com/users/janos/specter
robot-owner-email:  janos@specter.com
robot-status:       
robot-purpose:      maintainance, statistics
robot-type:         standalone
robot-platform:     
robot-availability: 
robot-exclusion:    no
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         
robot-from:         no
robot-useragent:    WebWatch
robot-language:     c++
robot-description:  Its purpose is to validate HTML, and generate statistics.
	Check URLs modified since a given date.
robot-history:      
robot-environment:
modified-date:      Wed Jul 26 13:36:32 1995
modified-by:

robot-id: wget
robot-name: Wget
robot-cover-url: ftp://gnjilux.cc.fer.hr/pub/unix/util/wget/
robot-details-url:
robot-owner-name: Hrvoje Niksic
robot-owner-url:
robot-owner-email: hniksic@srce.hr
robot-status: development
robot-purpose: mirroring, maintenance
robot-type: standalone
robot-platform: unix
robot-availability: source
robot-exclusion: yes
robot-exclusion-useragent: wget
robot-noindex: no
robot-host: *
robot-from: yes
robot-useragent: Wget/1.4.0
robot-language: C
robot-description:
  Wget is a utility for retrieving files using HTTP and FTP protocols.
  It works non-interactively, and can retrieve HTML pages and FTP
  trees recursively.  It can be used for mirroring Web pages and FTP
  sites, or for traversing the Web gathering data.  It is run by the
  end user or archive maintainer.
robot-history:
robot-environment: hobby, research
modified-date: Mon, 11 Nov 1996 06:00:44 MET
modified-by: Hrvoje Niksic

robot-id: whatuseek
robot-name: whatUseek Winona
robot-cover-url: http://www.whatUseek.com/
robot-details-url: http://www.whatUseek.com/
robot-owner-name: Neil Mansilla
robot-owner-url: http://www.whatUseek.com/
robot-owner-email: neil@whatUseek.com
robot-status: active
robot-purpose: Robot used for site-level search and meta-search engines.
robot-type: standalone
robot-platform: unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: winona
robot-noindex: yes
robot-host: *.whatuseek.com, *.aol2.com
robot-from: no
robot-useragent: whatUseek_winona/3.0
robot-language: c++
robot-description: The whatUseek robot, Winona, is used for site-level
 search engines.  It is also implemented in several meta-search engines.
robot-history: Winona was developed in November of 1996.
robot-environment: service
modified-date: Wed, 17 Jan 2001 11:52:00 EST
modified-by: Neil Mansilla

robot-id: whowhere
robot-name: WhoWhere Robot
robot-cover-url: http://www.whowhere.com
robot-details-url: 
robot-owner-name: Rupesh Kapoor
robot-owner-url: 
robot-owner-email: rupesh@whowhere.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: Sun Unix
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: whowhere
robot-noindex: no
robot-host: spica.whowhere.com
robot-from: no
robot-useragent: 
robot-language: C/Perl
robot-description: Gathers data for email directory from web pages
robot-history: 
robot-environment: commercial
modified-date: 
modified-by:

robot-id: wlm
robot-name: Weblog Monitor
robot-details-url: http://www.metastatic.org/wlm/
robot-cover-url: http://www.metastatic.org/wlm/
robot-owner-name: Casey Marshall
robot-owner-url: http://www.metastatic.org/
robot-owner-email: rsdio@metastatic.org
robot-status: active
robot-purpose: statistics
robot-type: standalone
robot-platform: unix, windows,
robot-availability: source, data
robot-exclusion: no
robot-exclusion-useragent: wlm
robot-noindex: no
robot-nofollow: no
robot-host: blossom.metastatic.org
robot-from: no
robot-useragent: wlm-1.1
robot-language: java
robot-description1: Builds the 'Picture of Weblogs' applet.
robot-description2: See http://www.metastatic.org/wlm/.
robot-environment: hobby
modified-date: Fri, 2 Nov 2001 04:55:00 PST

robot-id:           wmir
robot-name:         w3mir
robot-cover-url:    http://www.ifi.uio.no/~janl/w3mir.html
robot-details-url:
robot-owner-name:   Nicolai Langfeldt
robot-owner-url:    http://www.ifi.uio.no/~janl/w3mir.html
robot-owner-email:  w3mir-core@usit.uio.no
robot-status:
robot-purpose:      mirroring.
robot-type:         standalone
robot-platform:     UNIX, WindowsNT
robot-availability:
robot-exclusion:    no.
robot-exclusion-useragent:
robot-noindex:
robot-host:
robot-from:         yes
robot-useragent:    w3mir
robot-language:     Perl
robot-description:  W3mir uses the If-Modified-Since HTTP header and recurses
	only the directory and subdirectories of it's start
	document.  Known to work on U*ixes and Windows
	NT.
robot-history:
robot-environment:
modified-date:      Wed Apr 24 13:23:42 1996.
modified-by:

robot-id: wolp
robot-name: WebStolperer
robot-cover-url: http://www.suchfibel.de/maschinisten
robot-details-url: http://www.suchfibel.de/maschinisten/text/werkzeuge.htm (in German)
robot-owner-name: Marius Dahler
robot-owner-url: http://www.suchfibel.de/maschinisten
robot-owner-email: mda@suchfibel.de
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix, NT
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: WOLP
robot-noindex: yes
robot-host: www.suchfibel.de
robot-from: yes
robot-useragent: WOLP/1.0 mda/1.0
robot-language: perl5
robot-description: The robot gathers information about specified
 web-projects and generates knowledge bases in Javascript or an own
 format
robot-environment: hobby
modified-date: 22 Jul 1998
modified-by: Marius Dahler

robot-id:           wombat
robot-name:         The Web Wombat 
robot-cover-url:    http://www.intercom.com.au/wombat/
robot-details-url:
robot-owner-name:   Internet Communications
robot-owner-url:    http://www.intercom.com.au/
robot-owner-email:  phill@intercom.com.au
robot-status:
robot-purpose:      indexing, statistics.
robot-type:
robot-platform:
robot-availability:
robot-exclusion:    no.
robot-exclusion-useragent:
robot-noindex:
robot-host:         qwerty.intercom.com.au
robot-from:         no
robot-useragent:    no
robot-language:     IBM Rexx/VisualAge C++ under OS/2.
robot-description:  The robot is the basis of the Web Wombat search engine
	(Australian/New Zealand content ONLY).
robot-history:
robot-environment:
modified-date:      Thu Feb 29 00:39:49 1996.
modified-by:

robot-id:           worm
robot-name:         The World Wide Web Worm
robot-cover-url:    http://www.cs.colorado.edu/home/mcbryan/WWWW.html
robot-details-url:
robot-owner-name:   Oliver McBryan
robot-owner-url:    http://www.cs.colorado.edu/home/mcbryan/Home.html
robot-owner-email:  mcbryan@piper.cs.colorado.edu
robot-status:       
robot-purpose:      indexing
robot-type:         
robot-platform:     
robot-availability: 
robot-exclusion:    
robot-exclusion-useragent:
robot-noindex:      no
robot-host:         piper.cs.colorado.edu
robot-from:         
robot-useragent:    
robot-language:     
robot-description:  indexing robot, actually has quite flexible search
	options
robot-history:      
robot-environment:
modified-date:      
modified-by:

robot-id: wwwc
robot-name: WWWC Ver 0.2.5
robot-cover-url: http://www.kinet.or.jp/naka/tomo/wwwc.html
robot-details-url:
robot-owner-name: Tomoaki Nakashima.
robot-owner-url: http://www.kinet.or.jp/naka/tomo/
robot-owner-email: naka@kinet.or.jp
robot-status: active
robot-purpose: maintenance
robot-type: standalone
robot-platform: windows, windows95, windowsNT
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: WWWC
robot-noindex: no
robot-host:
robot-from: yes
robot-useragent: WWWC/0.25 (Win95)
robot-language: c
robot-description:
robot-history: 1997
robot-environment: hobby
modified-date: Tuesday, 18 Feb 1997 06:02:47 GMT
modified-by: Tomoaki Nakashima (naka@kinet.or.jp)

robot-id: wz101
robot-name: WebZinger
robot-details-url: http://www.imaginon.com/wzindex.html
robot-cover-url: http://www.imaginon.com
robot-owner-name: ImaginOn, Inc
robot-owner-url: http://www.imaginon.com
robot-owner-email: info@imaginon.com
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: windows95, windowsNT 4, mac, solaris, unix
robot-availability: binary
robot-exclusion: no
robot-exclusion-useragent: none
robot-noindex: no
robot-host: http://www.imaginon.com/wzindex.html *
robot-from: no
robot-useragent: none
robot-language: java
robot-description: commercial Web Bot that accepts plain text queries, uses
 webcrawler, lycos or excite to get URLs, then visits sites.  If the user's
 filter parameters are met, downloads one picture and a paragraph of test.
 Playsback slide show format of one text paragraph plus image from each site.
robot-history: developed by ImaginOn in 1996 and 1997
robot-environment: commercial
modified-date: Wed, 11 Sep 1997 02:00:00 GMT
modified-by: schwartz@imaginon.com

robot-id: xget
robot-name: XGET
robot-cover-url: http://www2.117.ne.jp/~moremore/x68000/soft/soft.html
robot-details-url: http://www2.117.ne.jp/~moremore/x68000/soft/soft.html
robot-owner-name: Hiroyuki Shigenaga
robot-owner-url: http://www2.117.ne.jp/~moremore/
robot-owner-email: shige@mh1.117.ne.jp
robot-status: active
robot-purpose: mirroring
robot-type: standalone
robot-platform: X68000, X68030
robot-availability: binary
robot-exclusion: yes
robot-exclusion-useragent: XGET
robot-noindex: no
robot-host: *
robot-from: yes
robot-useragent: XGET/0.7
robot-language: c
robot-description: Its purpose is to retrieve updated files.It is run by the end userrobot-history: 1997
robot-environment: hobby
modified-date: Fri, 07 May 1998 17:00:00 GMT
modified-by: Hiroyuki Shigenaga

robot-id: Nederland.zoek
robot-name: Nederland.zoek
robot-cover-url: http://www.nederland.net/
robot-details-url: 
robot-owner-name: System Operator Nederland.net
robot-owner-url: 
robot-owner-email: zoek@nederland.net
robot-status: active
robot-purpose: indexing
robot-type: standalone
robot-platform: unix (Linux)
robot-availability: none
robot-exclusion: yes
robot-exclusion-useragent: Nederland.zoek
robot-noindex: no
robot-host: 193.67.110.*
robot-from: yes
robot-useragent: Nederland.zoek
robot-language: c
robot-description: This robot indexes all .nl sites for the search-engine of Nederland.net
robot-history: Developed at Computel Standby in Apeldoorn, The Netherlands
robot-environment: service
modified-date: Sat, 8 Feb 1997 01:10:00 CET
modified-by: Sander Steffann <sander@nederland.net>

