= Data Crawler =
== Java Interface ==
Probably equal to:
* source:trunk/gnowsis/src/org/gnowsis/data/structured/StructuredAdapter.java and
* source:/trunk/gnoDesktopSearch/src/java/org/gnowsis/desktopsearch/crawler
and probably merge:
* biz.aduna.datasource.crawler.DataCrawlerListener with
* source:/trunk/gnoDesktopSearch/src/java/org/gnowsis/desktopsearch/crawler/CrawlerListener.java
{{{
#!java
/**
* A DataCrawler accesses the physical source represented by a DataSource
* and delivers a stream of DataObjects representing the individual items
* in that source.
*/
public interface DataCrawler {
/**
* Method from Gnowsis - StructuredAdapter
*
* The data inside the adapter may have a "root" url from which it is possible
* to reach other urls through the links graph. This is the same function as
* in GenericAdapter, but this time you must return a set of root urls
* that point to containers.
* @return an array of strings
*/
public String[] getRootUrls();
/**
* Method from Gnowsis - StructuredAdapter
*
* list the sub-containers of the passed container.
* This returns a RDF container that represents the containerUri but
* contains only other containers.
* @param containerUri the uri of the container
* @return null if the container cannot be found, otherwise an Container with Containers
*/
public Container listSubContainers(String containerUri);
/**
* Method from Gnowsis - StructuredAdapter
*
* list the resources in the passed container.
* This returns a RDF container that represents the containerUri but
* contains only the resources/documents/files/emails in the container.
* You have to describe each container item (which has to have a unique uri in the domain of the adapter)
* resource using exactly these properties:
*
* - rdf:type
* - rdfs:label
* - dcterms:modified - Date on which the resource was changed.
* SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
*
* Each resource must have a URI that can be used in a getCBD call.
*
* @param containerUri the uri of the container
* @return null if the container cannot be found, otherwise an Container with Resources
*/
public Container listResources(String containerUri);
/**
* Constants used to indicate why a scan was stopped.
*/
public static enum ExitCode {
COMPLETED, // the scan procedure terminated normally
BY_REQUEST, // the DataCrawler was requested to abort the scan procedure
FATAL_EXCEPTION // an error occurred that made further scanning impossible
};
/**
* Returns the DataSource crawler by this DataCrawler.
*/
public DataSource getDataSource();
/**
* Starts a scan for DataObjects over the configured domain defined
* in the DataSource. If this is not the first run of this DataCrawler,
* it will only report the differences with the previous run, unless the
* previous scan results have been cleared.
**/
public void scan();
/**
* Stops a running scan operation as fast as possible. This method
* may return before the operation has actually been stopped.
**/
public void stopScanning();
/**
* Clears all stored scan results. Any listeners registered with
* this data source will be notified of the removal of the data
* objects. The next call to scan() will again report all
* data objects in the configured domain.
**/
public void clearScanResults();
/**
* Gets the ScanReport of the last performed scan, or the current
* scan when a scan is in progress. Returns null when
* no scan was performed in this session and there is no scan report
* available from the previous session.
*
* @return The ScanReport of the last session, or null when
* this is not available.
**/
public ScanReport getLastScanReport();
/**
* Adds a DataSourceListener to which this data source should
* report any scanned or cleared data objects.
*
* @param listener The DataCrawlerListener to add.
**/
public void addListener(DataCrawlerListener listener);
/**
* Removes a DataSourceListener from this data source.
*
* @param listener The DataCrawlerListener to remove.
**/
public void removeListener(DataCrawlerListener listener);
}
}}}