001/** 002 * 003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved. 004 * 005 * This library is free software; you can redistribute it and/or 006 * modify it under the terms of the GNU Lesser General Public 007 * License as published by the Free Software Foundation; either 008 * version 2.1 of the License, or (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013 * Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public 016 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 017 * 018 **/ 019package lucee.runtime.search.lucene2.docs; 020 021import java.io.IOException; 022import java.io.Reader; 023 024import lucee.commons.io.IOUtil; 025import lucee.commons.io.res.Resource; 026import lucee.commons.lang.StringUtil; 027import lucee.runtime.op.Caster; 028 029import org.apache.lucene.document.Document; 030 031/** A utility for making Lucene Documents from a File. */ 032 033public final class FileDocument { 034 035 //private static final char FILE_SEPARATOR = System.getProperty("file.separator").charAt(0); 036 private static final int SUMMERY_SIZE=200; 037 038 /** Makes a document for a File. 039 <p> 040 The document has three fields: 041 <ul> 042 <li><code>path</code>--containing the pathname of the file, as a stored, 043 tokenized field; 044 <li><code>modified</code>--containing the last modified date of the file as 045 a keyword field as encoded by <a 046 href="lucene.document.DateField.html">DateField</a>; and 047 <li><code>contents</code>--containing the full contents of the file, as a 048 Reader field; 049 * @param res 050 * @return matching document 051 * @throws IOException 052 */ 053 public static Document getDocument(Resource res,String charset) 054 throws IOException { 055 056 // make a new, empty document 057 Document doc = new Document(); 058 doc.add(FieldUtil.UnIndexed("mime-type", "text/plain")); 059 060 String content=IOUtil.toString(res,charset); 061 FieldUtil.setRaw(doc,content); 062 //doc.add(FieldUtil.UnIndexed("raw", content)); 063 doc.add(FieldUtil.Text("contents", content.toLowerCase())); 064 doc.add(FieldUtil.UnIndexed("summary",StringUtil.max(content,SUMMERY_SIZE))); 065 return doc; 066 } 067 068 069 public static Document getDocument(StringBuffer content, Reader r) throws IOException { 070 071 // make a new, empty document 072 Document doc = new Document(); 073 FieldUtil.setMimeType(doc, "text/plain"); 074 // 075 String contents=IOUtil.toString(r); 076 if(content!=null)content.append(contents); 077 doc.add(FieldUtil.UnIndexed("size", Caster.toString(contents.length()))); 078 FieldUtil.setContent(doc, contents); 079 FieldUtil.setRaw(doc, contents); 080 FieldUtil.setSummary(doc, StringUtil.max(contents,SUMMERY_SIZE),false); 081 return doc; 082 } 083 084 private FileDocument() {} 085} 086