Context Navigation

Changes between Version 8 and Version 9 of ApertureExtractor

Timestamp:: 10/12/05 16:23:24 (20 years ago)
Author:: dburkhar
Comment:: --

Legend:

: Unmodified
: Added
: Removed
: Modified

ApertureExtractor

-                      v8
+                      v9
  */
 public interface Extractor {
+    /**
+     * return a plaintext representation of the file
+     * @param source the file to look into
+     * @param mimetype the mimetype that has been identified by gnowsis that this file is
+     * @return null or a string. Null is returned, if no plaintext is in the file. If it could not be
+     * extracted, an exception is thrown.
+     * @throws ExtractionException when something goes wrong with extraction
+     * @throws FileNotFoundException when the file is not existant
+     */
+    public String getPlaintext(File source, String mimetype) throws FileNotFoundException, ExtractionException ;
+    /**
+     * create a lucene document.
+     * To see what fields would be needed, look at the top of this class.
+     * @param file
+     * @param uri the uri identifying the passed file. You may need it when you add sophisticated rdf information
+     * @param mimetype the mimetype of the passed file/stream. If your extractor can handle multiple mime-types, this can be handy.
+     * @param options optional options that may help you.
+     * @return a lucene document
+     */
+    public Document createLuceneDocument(File file, String uri, String mimetype, Object options) throws IOException, DocumentExtractorException;
+    public Document createLuceneDocument(InputStream stream, String uri, String mimetype, Object options) throws IOException, DocumentExtractorException;
+}