001/**
002 *
003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved.
004 *
005 * This library is free software; you can redistribute it and/or
006 * modify it under the terms of the GNU Lesser General Public
007 * License as published by the Free Software Foundation; either 
008 * version 2.1 of the License, or (at your option) any later version.
009 * 
010 * This library is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013 * Lesser General Public License for more details.
014 * 
015 * You should have received a copy of the GNU Lesser General Public 
016 * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
017 * 
018 **/
019package lucee.runtime.search.lucene2.docs;
020
021import java.io.IOException;
022import java.io.Reader;
023
024import lucee.commons.io.IOUtil;
025import lucee.commons.io.res.Resource;
026import lucee.commons.lang.StringUtil;
027import lucee.runtime.op.Caster;
028
029import org.apache.lucene.document.Document;
030
031/** A utility for making Lucene Documents from a File. */
032
033public final class FileDocument {
034    
035    //private static final char FILE_SEPARATOR = System.getProperty("file.separator").charAt(0);
036    private static final int SUMMERY_SIZE=200;
037    
038  /** Makes a document for a File.
039    <p>
040    The document has three fields:
041    <ul>
042    <li><code>path</code>--containing the pathname of the file, as a stored,
043    tokenized field;
044    <li><code>modified</code>--containing the last modified date of the file as
045    a keyword field as encoded by <a
046    href="lucene.document.DateField.html">DateField</a>; and
047    <li><code>contents</code>--containing the full contents of the file, as a
048    Reader field;
049 * @param res
050 * @return matching document
051 * @throws IOException
052    */
053  public static Document getDocument(Resource res,String charset)
054       throws IOException {
055         
056    // make a new, empty document
057    Document doc = new Document();
058    doc.add(FieldUtil.UnIndexed("mime-type", "text/plain"));
059
060    String content=IOUtil.toString(res,charset);
061    FieldUtil.setRaw(doc,content);
062    //doc.add(FieldUtil.UnIndexed("raw", content));
063    doc.add(FieldUtil.Text("contents", content.toLowerCase()));
064    doc.add(FieldUtil.UnIndexed("summary",StringUtil.max(content,SUMMERY_SIZE)));
065    return doc;
066  }
067  
068
069  public static Document getDocument(StringBuffer content, Reader r) throws IOException {
070         
071    // make a new, empty document
072    Document doc = new Document();
073    FieldUtil.setMimeType(doc, "text/plain");
074    //
075    String contents=IOUtil.toString(r);
076    if(content!=null)content.append(contents);
077    doc.add(FieldUtil.UnIndexed("size", Caster.toString(contents.length())));
078    FieldUtil.setContent(doc, contents);
079    FieldUtil.setRaw(doc, contents);
080    FieldUtil.setSummary(doc, StringUtil.max(contents,SUMMERY_SIZE),false);
081    return doc;
082  }
083
084  private FileDocument() {}
085}
086