| | 16 | |
| | 17 | /** |
| | 18 | * return a plaintext representation of the file |
| | 19 | * @param source the file to look into |
| | 20 | * @param mimetype the mimetype that has been identified by gnowsis that this file is |
| | 21 | * @return null or a string. Null is returned, if no plaintext is in the file. If it could not be |
| | 22 | * extracted, an exception is thrown. |
| | 23 | * @throws ExtractionException when something goes wrong with extraction |
| | 24 | * @throws FileNotFoundException when the file is not existant |
| | 25 | */ |
| | 26 | public String getPlaintext(File source, String mimetype) throws FileNotFoundException, ExtractionException ; |
| | 27 | |
| | 28 | |
| | 29 | |
| | 30 | |
| | 31 | |
| | 32 | /** |
| | 33 | * create a lucene document. |
| | 34 | * To see what fields would be needed, look at the top of this class. |
| | 35 | * @param file |
| | 36 | * @param uri the uri identifying the passed file. You may need it when you add sophisticated rdf information |
| | 37 | * @param mimetype the mimetype of the passed file/stream. If your extractor can handle multiple mime-types, this can be handy. |
| | 38 | * @param options optional options that may help you. |
| | 39 | * @return a lucene document |
| | 40 | */ |
| | 41 | public Document createLuceneDocument(File file, String uri, String mimetype, Object options) throws IOException, DocumentExtractorException; |
| | 42 | |
| | 43 | public Document createLuceneDocument(InputStream stream, String uri, String mimetype, Object options) throws IOException, DocumentExtractorException; |
| | 44 | |
| | 45 | |