001    package railo.runtime.search.lucene2;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.net.URL;
006    import java.util.ArrayList;
007    import java.util.HashMap;
008    import java.util.HashSet;
009    import java.util.Iterator;
010    import java.util.Map;
011    import java.util.Map.Entry;
012    import java.util.Set;
013    
014    import org.apache.lucene.analysis.Analyzer;
015    import org.apache.lucene.document.Document;
016    import org.apache.lucene.index.IndexReader;
017    import org.apache.lucene.index.IndexWriter;
018    import org.apache.lucene.queryParser.ParseException;
019    import org.apache.lucene.queryParser.QueryParser;
020    import org.apache.lucene.search.Hits;
021    import org.apache.lucene.search.IndexSearcher;
022    import org.apache.lucene.search.Query;
023    import org.apache.lucene.search.Searcher;
024    import org.apache.lucene.search.spell.Dictionary;
025    import org.apache.lucene.search.spell.LuceneDictionary;
026    import org.apache.lucene.search.spell.SpellChecker;
027    import org.apache.lucene.store.FSDirectory;
028    
029    import railo.commons.io.SystemUtil;
030    import railo.commons.io.log.LogAndSource;
031    import railo.commons.io.res.Resource;
032    import railo.commons.io.res.ResourcesImpl;
033    import railo.commons.io.res.filter.DirectoryResourceFilter;
034    import railo.commons.io.res.filter.ResourceFilter;
035    import railo.commons.io.res.filter.ResourceNameFilter;
036    import railo.commons.io.res.util.FileWrapper;
037    import railo.commons.io.res.util.ResourceUtil;
038    import railo.commons.lang.SerializableObject;
039    import railo.commons.lang.StringUtil;
040    import railo.runtime.op.Caster;
041    import railo.runtime.search.AddionalAttrs;
042    import railo.runtime.search.IndexResult;
043    import railo.runtime.search.IndexResultImpl;
044    import railo.runtime.search.SearchCollectionSupport;
045    import railo.runtime.search.SearchData;
046    import railo.runtime.search.SearchEngineSupport;
047    import railo.runtime.search.SearchException;
048    import railo.runtime.search.SearchIndex;
049    import railo.runtime.search.SearchResulItem;
050    import railo.runtime.search.SearchResulItemImpl;
051    import railo.runtime.search.SuggestionItem;
052    import railo.runtime.search.lucene2.docs.CustomDocument;
053    import railo.runtime.search.lucene2.highlight.Highlight;
054    import railo.runtime.search.lucene2.net.WebCrawler;
055    import railo.runtime.search.lucene2.query.Literal;
056    import railo.runtime.search.lucene2.query.Op;
057    import railo.runtime.type.List;
058    import railo.runtime.type.QueryColumn;
059    import railo.runtime.type.Struct;
060    import railo.runtime.type.StructImpl;
061    import railo.runtime.type.dt.DateTime;
062    
063    /**
064     * 
065     */
066    public final class LuceneSearchCollection extends SearchCollectionSupport {
067        
068            private static final long serialVersionUID = 3430238280421965781L;
069            
070            private Resource collectionDir;
071            private boolean spellcheck;
072            private LogAndSource log;
073        private static final SerializableObject token=new SerializableObject();
074            
075            
076        /**
077         * @param searchEngine
078         * @param name
079         * @param path
080         * @param language
081         * @param lastUpdate
082         * @param created 
083         */
084        public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 
085                    DateTime lastUpdate, DateTime created,boolean spellcheck) {
086            super(searchEngine, name, path, language, lastUpdate,created);
087            this.spellcheck=spellcheck;
088            collectionDir=getPath().getRealResource(StringUtil.toIdentityVariableName(getName()));
089            
090            log=searchEngine.getLogger();
091            
092        }
093        
094        public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 
095                    DateTime lastUpdate, DateTime created) {
096            this(searchEngine, name, path, language, lastUpdate, created, true);
097        }
098    
099        /**
100         * @see railo.runtime.search.SearchCollection#_create()
101         */
102        protected void _create() throws SearchException {
103            try {
104                            if(!collectionDir.exists())collectionDir.createDirectory(true);
105                    }
106            catch (IOException e) {}
107        }
108    
109        /**
110         * @see railo.runtime.search.SearchCollection#_optimize()
111         */
112        protected void _optimize() throws SearchException {
113            IndexWriter[] writers=_getWriters(false);
114            for(int i=0;i<writers.length;i++) {
115                try {
116                    optimizeEL(writers[i]);
117                } 
118                finally {
119                    close(writers[i]);
120                }
121            }
122        }
123    
124        /**
125         *
126         * @see railo.runtime.search.SearchCollectionSupport#_map(railo.commons.io.res.Resource)
127         */
128        protected void _map(Resource path) throws SearchException {
129            throw new SearchException("mapping of existing Collection for file ["+path+"] not supported");
130        }
131    
132        /**
133         * @see railo.runtime.search.SearchCollection#_repair()
134         */
135        protected void _repair() throws SearchException {
136            //throw new SearchException("repair of existing Collection not supported");
137        }
138    
139        /**
140         * @see railo.runtime.search.SearchCollectionSupport#_indexFile(java.lang.String, java.lang.String, railo.commons.io.res.Resource, java.lang.String)
141         */
142        protected IndexResult _indexFile(String id, String title, Resource res,String language) throws SearchException {
143            info(res.getAbsolutePath());
144            _checkLanguage(language);
145            int before=getDocumentCount(id);
146            IndexWriter writer=null;
147            synchronized(token){
148                    try {
149                        writer = _getWriter(id,true);
150                        _index(writer,res,res.getName());
151                        writer.optimize();
152                    } 
153                    catch (Exception e) {
154                        throw new SearchException(e);
155                    }
156                    finally {
157                            close(writer);
158                    }
159                    indexSpellCheck(id);
160            }
161            if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1);
162            return new IndexResultImpl(0,1,0);
163        }
164    
165        
166    
167            /**
168         * @see railo.runtime.search.SearchCollectionSupport#_indexPath(java.lang.String, java.lang.String, railo.commons.io.res.Resource, java.lang.String[], boolean, java.lang.String)
169         */
170        protected IndexResult _indexPath(String id, String title, Resource dir,String[] extensions, boolean recurse, String language) throws SearchException {
171            info(dir.getAbsolutePath());
172            _checkLanguage(language);
173            int doccount=0;
174            IndexWriter writer=null;
175            synchronized(token){
176                    try {
177                        writer = _getWriter(id,true);
178                                    doccount=_list(0,writer,dir,new LuceneExtensionFileFilter(extensions,recurse),"");
179                            //optimizeEL(writer);
180                                    writer.optimize();
181                    } 
182                    catch (IOException e) {
183                                    throw new SearchException(e);
184                            }
185                    finally {
186                            close(writer);
187                    }
188                    indexSpellCheck(id);
189            }
190            
191            
192            
193            return new IndexResultImpl(0,0,doccount);
194        } 
195    
196        private void optimizeEL(IndexWriter writer) {
197            if(writer==null)return;
198            try {
199                            writer.optimize();
200                    } 
201            catch (Throwable t) {
202                            //print.printST(t);
203                    }
204            }
205    
206            private void indexSpellCheck(String id) throws SearchException  {
207            if(!spellcheck) return;
208            
209            IndexReader reader=null;
210            FSDirectory spellDir=null;
211            
212            Resource dir = _createSpellDirectory(id);
213                    try {
214                    File spellFile = FileWrapper.toFile(dir);
215                    spellDir = FSDirectory.getDirectory(spellFile);
216                    reader = _getReader(id,false);
217                    Dictionary dictionary = new LuceneDictionary(reader,"contents");
218                            
219                    SpellChecker spellChecker = new SpellChecker(spellDir);
220                    spellChecker.indexDictionary(dictionary);
221                            
222            }
223            catch(IOException ioe) {
224                    throw new SearchException(ioe);
225            }
226            finally {
227                    flushEL(reader);
228                            closeEL(reader);
229            }
230            }
231    
232            private void close(IndexWriter writer) throws SearchException {
233            if(writer!=null){
234                    //print.out("w-close");
235                    try {
236                                    writer.close();
237                            } catch (IOException e) {
238                                    throw new SearchException(e);
239                            }
240            }
241            }
242    
243        private static void close(IndexReader reader) throws SearchException {
244            if(reader!=null){
245                    try {
246                                    reader.close();
247                            } catch (IOException e) {
248                                    throw new SearchException(e);
249                            }
250            }
251            }
252    
253        private static void close(Searcher searcher) throws SearchException {
254            if(searcher!=null){
255                    try {
256                            searcher.close();
257                            } catch (IOException e) {
258                                    throw new SearchException(e);
259                            }
260            }
261            }
262    
263        private static void flushEL(IndexReader reader) {
264            //print.out("r-closeEL");
265            if(reader!=null){
266                    try {
267                                    reader.flush();
268                            } catch (Throwable t) {
269                                    //throw new SearchException(t);
270                            }
271            }
272            }
273        private static void closeEL(IndexReader reader) {
274            //print.out("r-closeEL");
275            if(reader!=null){
276                    try {
277                                    reader.close();
278                            } catch (Throwable t) {
279                                    //throw new SearchException(t);
280                            }
281            }
282            }
283    
284            /**
285         * @see railo.runtime.search.SearchCollection#_indexURL(java.lang.String, java.lang.String, java.net.URL, java.lang.String[], boolean, java.lang.String)
286         */
287        protected IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language)throws SearchException {
288            //timeout=ThreadLocalPageContext.getConfig().getRequestTimeout().getMillis();
289            return _indexURL(id, title, url, extensions, recurse, language,50000L);
290        }
291        public IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language, long timeout)throws SearchException {
292            _checkLanguage(language);
293            info(url.toExternalForm());
294            int before=getDocumentCount(id);
295            IndexWriter writer=null;
296            synchronized(token){
297                    try {
298                        writer = _getWriter(id,true);
299                        new WebCrawler(log).parse(writer, url, extensions, recurse,timeout);
300                        
301                        writer.optimize();
302                    } 
303                    catch (Exception e) {
304                        throw new SearchException(e);
305                    }
306                    finally {
307                            close(writer);
308                    }
309                    indexSpellCheck(id);
310            }
311            if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1);
312            return new IndexResultImpl(0,1,0);
313            //throw new SearchException("url indexing not supported");
314            
315        }
316    
317        /**
318         * @param id
319         * @param title
320         * @param keyColumn
321         * @param bodyColumns
322         * @param language
323         * @param custom1
324         * @param custom2
325         * @param custom3
326         * @param custom4
327         * @return 
328         * @throws SearchException
329         */
330        protected IndexResult _deleteCustom(String id,QueryColumn keyColumn) throws SearchException {
331    
332            int countBefore=0;
333            int countAfter=0; 
334            
335            Map<String,Document> docs=new HashMap<String,Document>();
336            
337            Set<String> keys=toSet(keyColumn);
338            IndexWriter writer=null;
339            String key;
340            IndexReader reader=null;
341            Document doc;
342            
343            synchronized(token){
344                    try {
345                            try {
346                                    reader=_getReader(id,false);
347                                    countBefore=reader.maxDoc();
348                                    for(int i=0;i<countBefore;i++) {
349                                            doc=reader.document(i);
350                                            key=doc.getField("key").stringValue();
351                                            if(!keys.contains(key))
352                                                    docs.put(key,doc);
353                                    }
354                            }
355                            catch(Exception e) {}
356                            finally {
357                                    close(reader);
358                            } 
359                            countAfter=docs.size(); 
360                            
361                            
362                            writer = _getWriter(id,true);
363                            Iterator<Entry<String, Document>> it = docs.entrySet().iterator();
364                            while(it.hasNext()) {
365                                    writer.addDocument(it.next().getValue());
366                            }
367                            optimizeEL(writer);
368                        
369                    } catch (IOException e) {
370                                    throw new SearchException(e);
371                            }
372                    finally {
373                            close(writer);
374                    }
375                    indexSpellCheck(id);
376            }
377            int removes=countBefore-countAfter;
378    
379            return new IndexResultImpl(removes,0,0);
380        }
381    
382        private Set<String> toSet(QueryColumn column) {
383            Set<String> set=new HashSet<String>();
384            Iterator it = column.valueIterator();
385            while(it.hasNext()){
386                    set.add(Caster.toString(it.next(),null));
387            }
388                    return set;
389            }
390    
391            /**
392         * @param id
393         * @param title
394         * @param keyColumn
395         * @param bodyColumns
396         * @param language
397         * @param custom1
398         * @param custom2
399         * @param custom3
400         * @param custom4
401         * @return 
402         * @throws SearchException
403         */
404        protected IndexResult _indexCustom(String id, Object title, QueryColumn keyColumn, QueryColumn[] bodyColumns, String language,
405                    Object urlpath,Object custom1,Object custom2,Object custom3,Object custom4) throws SearchException {
406            _checkLanguage(language);
407            String t;
408            String url;
409            String c1;
410            String c2;
411            String c3;
412            String c4;
413            
414            int countExisting=0; 
415            int countAdd=keyColumn.size();
416            int countNew=0;
417            
418            Map<String,Document> docs=new HashMap<String,Document>();
419            IndexWriter writer=null;
420            synchronized(token){
421                    try {
422                            // read existing reader
423                            IndexReader reader=null;
424                            try {
425                                    reader=_getReader(id,false);
426                                    int len=reader.maxDoc();
427                                    Document doc;
428                                    for(int i=0;i<len;i++) {
429                                            doc=reader.document(i);
430                                            docs.put(doc.getField("key").stringValue(),doc);
431                                    }
432                            }
433                            catch(Exception e) {}
434                            finally {
435                                    close(reader);
436                            }  
437            
438                            countExisting=docs.size();
439                            writer = _getWriter(id,true);
440                            int len = keyColumn.size();
441                            String key;
442                            for(int i=1;i<=len;i++) {
443                                key=Caster.toString(keyColumn.get(i,null),null);
444                                if(key==null) continue;
445                                
446                                StringBuilder body=new StringBuilder();
447                                for(int y=0;y<bodyColumns.length;y++) {
448                                    Object tmp=bodyColumns[y].get(i,null);
449                                    if(tmp!=null){
450                                    body.append(tmp.toString());
451                                    body.append(' ');
452                                    }
453                                }
454                            //t=(title==null)?null:Caster.toString(title.get(i,null),null);
455                            //url=(urlpath==null)?null:Caster.toString(urlpath.get(i,null),null);
456                            
457                            t=getRow(title,i);
458                            url=getRow(urlpath,i);
459                            c1=getRow(custom1,i);
460                            c2=getRow(custom2,i);
461                            c3=getRow(custom3,i);
462                            c4=getRow(custom4,i);
463                            
464                            docs.put(key,CustomDocument.getDocument(t,key,body.toString(),url,c1,c2,c3,c4));
465                            }
466                            countNew=docs.size();
467                            Iterator<Entry<String, Document>> it = docs.entrySet().iterator();
468                            Entry<String, Document> entry;
469                            Document doc;
470                            while(it.hasNext()) {
471                                    entry = it.next();
472                                    doc = entry.getValue();
473                                    writer.addDocument(doc);
474                            }
475                            optimizeEL(writer);
476                        //writer.optimize();
477                        
478                    }
479                    catch(IOException ioe) {
480                        throw new SearchException(ioe);
481                    }
482                    finally {
483                            close(writer);
484                    }
485                    indexSpellCheck(id);
486            }
487            int inserts=countNew-countExisting;
488    
489            return new IndexResultImpl(0,inserts,countAdd-inserts);
490        }
491    
492            private String getRow(Object column, int row) {
493                    if(column instanceof QueryColumn){
494                            return Caster.toString(((QueryColumn)column).get(row,null),null);
495                    }
496                    if(column!=null) return Caster.toString(column,null);
497                    return null;
498            }
499    
500            /**
501         * @see railo.runtime.search.SearchCollection#_purge()
502         */
503        protected IndexResult _purge() throws SearchException {
504            SearchIndex[] indexes=getIndexes();
505            int count=0;
506            for(int i=0;i<indexes.length;i++) {
507                    count+=getDocumentCount(indexes[i].getId());
508            }
509            ResourceUtil.removeChildrenEL(collectionDir);
510            return new IndexResultImpl(count,0,0);
511        }
512    
513        /**
514         * @see railo.runtime.search.SearchCollection#_delete()
515         */
516        protected IndexResult _delete() throws SearchException {
517            SearchIndex[] indexes=getIndexes();
518            int count=0;
519            for(int i=0;i<indexes.length;i++) {
520                    count+=getDocumentCount(indexes[i].getId());
521            }
522            ResourceUtil.removeEL(collectionDir, true);
523            return new IndexResultImpl(count,0,0);
524        }
525    
526            /**
527         * @see railo.runtime.search.SearchCollectionSupport#_deleteIndex(java.lang.String)
528         */
529        protected IndexResult _deleteIndex(String id) throws SearchException {
530            int count=getDocumentCount(id);
531            ResourceUtil.removeEL(_getIndexDirectory(id,true), true);
532            return new IndexResultImpl(count,0,0);
533        }
534    
535        /**
536         * @see railo.runtime.search.SearchCollection#_search(railo.runtime.search.SearchData, java.lang.String, java.lang.String, short, java.lang.String, java.lang.String[])
537         */
538        public SearchResulItem[] _search(SearchData data, String criteria, String language,short type, 
539                    String categoryTree, String[] category) throws SearchException {
540            try {
541                    
542                if(type!=SEARCH_TYPE_SIMPLE) throw new SearchException("search type explicit not supported");
543                Analyzer analyzer = SearchUtil.getAnalyzer(language);
544                Query query=null;
545                Op op=null;
546                Object highlighter=null;
547                railo.runtime.search.lucene2.query.QueryParser queryParser=new railo.runtime.search.lucene2.query.QueryParser();
548                            AddionalAttrs aa = AddionalAttrs.getAddionlAttrs();
549                            aa.setHasRowHandling(true);
550                            int startrow=aa.getStartrow();
551                            int maxrows=aa.getMaxrows();
552                            
553                            
554                            if(!criteria.equals("*")) {
555                                    // FUTURE take this data from calling parameters
556                                    op=queryParser.parseOp(criteria);
557                                    if(op==null) criteria="*";
558                                    else criteria=op.toString();
559                                    try {
560                                            
561                                            query = new QueryParser("contents",analyzer ).parse(criteria);
562                                            highlighter = Highlight.createHighlighter(query,aa.getContextHighlightBegin(),aa.getContextHighlightEnd());
563                                            
564                                
565                                    }
566                        catch (ParseException e) {
567                                            throw new SearchException(e);
568                                    }
569                            }
570                            
571                            Resource[] files = _getIndexDirectories();
572                            
573                if(files==null) return new SearchResulItem[0];
574                ArrayList<SearchResulItem> list=new ArrayList<SearchResulItem>();
575                String ct,c;
576                
577                ArrayList<String> spellCheckIndex=spellcheck?new ArrayList<String>():null;
578                
579                int count=0;
580                IndexReader reader = null;
581                Searcher searcher = null;
582                try {
583                        outer:for(int i=0;i<files.length;i++) {
584                                    if(removeCorrupt(files[i]))continue;
585                            String strFile=files[i].toString();
586                                SearchIndex si = (SearchIndex)indexes.get(files[i].getName());
587                                
588                                if(si==null)continue;
589                                ct=si.getCategoryTree();
590                                c=List.arrayToList(si.getCategories(), ",");
591                                
592                                // check category tree
593                                if(!matchCategoryTree(ct,categoryTree))continue;
594                                if(!matchCategories(si.getCategories(),category))continue;
595                                
596                                Document doc;
597                                String id=files[i].getName();
598                                data.addRecordsSearched(_countDocs(strFile));
599                                
600                            reader = _getReader(id,false);
601                            if(query==null && "*".equals(criteria)) {
602                                    int len=reader.numDocs();
603                                        for(int y=0;y<len;y++) {
604                                            if(startrow>++count)continue;
605                                            if(maxrows>-1 && list.size()>=maxrows) break outer;
606                                            doc = reader.document(y);
607                                                list.add(createSearchResulItem(highlighter,analyzer,doc,id,1,ct,c,aa.getContextPassages(),aa.getContextBytes()));
608                                        }
609                                }
610                                else {
611                                        if(spellcheck)spellCheckIndex.add(id);
612                                    // search
613                                        searcher = new IndexSearcher(reader);
614                                    Hits hits = searcher.search(query);
615                                        int len=hits.length();
616                                        for (int y=0; y<len; y++) {
617                                            if(startrow>++count)continue;
618                                            if(maxrows>-1 && list.size()>=maxrows) break outer;
619                                            //list.add(new SearchResulItemHits(hits,y,highlighter,analyzer,id,ct,c,aa.getContextPassages(),aa.getContextBytes()));
620                                            doc = hits.doc(y);
621                                                list.add(createSearchResulItem(highlighter,analyzer,doc,id,hits.score(y),ct,c,aa.getContextPassages(),aa.getContextBytes()));
622                                        }  
623                                       
624                                }
625                         
626                            }
627                }
628                    finally {
629                            close(reader);
630                            close(searcher);
631                    }   
632                
633                // spellcheck
634                //SearchData data=ThreadLocalSearchData.get();
635                if(spellcheck && data!=null) {
636                    if(data.getSuggestionMax()>=list.size()) {
637                            
638                            Map suggestions = data.getSuggestion();
639                            Iterator it = spellCheckIndex.iterator();
640                            String id;
641                            Literal[] literals = queryParser.getLiteralSearchedTerms();
642                            String[] strLiterals = queryParser.getStringSearchedTerms();
643                            boolean setSuggestionQuery=false;
644                            while(it.hasNext()) {
645                                    id=(String) it.next();
646                                    // add to set to remove duplicate values
647                                    SuggestionItem si;
648                                    SpellChecker sc = getSpellChecker(id);
649                                    for(int i=0;i<strLiterals.length;i++) {
650                                            String[] arr = sc.suggestSimilar(strLiterals[i], 1000);
651                                            if(arr.length>0){
652                                                    literals[i].set("<suggestion>"+arr[0]+"</suggestion>"); 
653                                                    setSuggestionQuery=true;
654                                                    
655                                                    si=(SuggestionItem) suggestions.get(strLiterals[i]);
656                                                    if(si==null)suggestions.put(strLiterals[i],new SuggestionItem(arr));
657                                                    else si.add(arr);
658                                            }
659                                    }
660                                    }
661                            if(setSuggestionQuery)data.setSuggestionQuery(op.toString());
662                    }
663                }
664                
665                    return list.toArray(new SearchResulItem[list.size()]);
666            } 
667            catch (IOException e)           { throw new SearchException(e); }
668            
669        }
670        
671        private SpellChecker getSpellChecker(String id) throws IOException {
672            FSDirectory siDir = FSDirectory.getDirectory(FileWrapper.toFile(_getSpellDirectory(id)));
673            SpellChecker spellChecker = new SpellChecker(siDir);
674            return spellChecker;
675        }
676    
677            private boolean removeCorrupt(Resource dir) {
678            if(ResourceUtil.isEmptyFile(dir)) {
679                    ResourceUtil.removeEL(dir, true);
680                    return true;
681            }
682            return false;
683            }
684    
685            private static SearchResulItem createSearchResulItem(Object highlighter,Analyzer a,Document doc, String name, float score, String ct, String c,int maxNumFragments, int maxLength) {
686                    String contextSummary="";
687                    if(maxNumFragments>0)
688                            contextSummary=Highlight.createContextSummary(highlighter,a,doc.get("contents"),maxNumFragments,maxLength,doc.get("summary"));
689                    String summary = doc.get("summary");
690                    
691                    return new SearchResulItemImpl(
692                    name,
693                    doc.get("title"),
694                    score,
695                    doc.get("key"),
696                    doc.get("url"),
697                    summary,contextSummary,
698                    ct,c,
699                    doc.get("custom1"),
700                    doc.get("custom2"),
701                    doc.get("custom3"),
702                    doc.get("custom4"),
703                    doc.get("mime-type"),
704                    doc.get("author"),
705                    doc.get("size"));
706    
707            }
708    
709            private boolean matchCategories(String[] categoryIndex, String[] categorySearch) {
710            if(categorySearch==null ||categorySearch.length==0) return true;
711            String search;
712            for(int s=0;s<categorySearch.length;s++) {
713                    search=categorySearch[s];
714                    for(int i=0;i<categoryIndex.length;i++) {
715                            if(search.equals(categoryIndex[i]))return true;
716                    }
717            }
718                    return false;
719            }
720    
721            private boolean matchCategoryTree(String categoryTreeIndex, String categoryTreeSearch) {
722            //if(StringUtil.isEmpty(categoryTreeIndex) || categoryTreeIndex.equals("/")) return true;
723            //if(StringUtil.isEmpty(categoryTreeSearch) || categoryTreeSearch.equals("/")) return true;
724            return categoryTreeIndex.startsWith(categoryTreeSearch);
725            }
726    
727       /**
728         * list a directory and call every file 
729         * @param writer
730         * @param res
731         * @param filter
732         * @param url
733         * @throws IOException
734         * @throws InterruptedException
735         */
736        private int _list(int doccount,IndexWriter writer, Resource res,ResourceFilter filter,String url) {
737            
738            if (res.isReadable()) {
739                    if (res.exists() && res.isDirectory()) {
740                    Resource[] files = (filter==null)?res.listResources():res.listResources(filter);
741                    if (files != null) {
742                        for (int i = 0; i < files.length; i++) {
743                            if(removeCorrupt(files[i])){
744                                    continue;
745                            }
746                            doccount=_list(doccount,writer, files[i],filter,url+"/"+files[i].getName());
747                        }
748                    }
749                } 
750                else {
751                    try {
752                            info(res.getAbsolutePath());
753                        _index(writer,res,url);
754                        doccount++;
755                    } catch (Exception e) {}
756                }
757            }
758            return doccount;
759        }
760        
761        /**
762         * index a single file
763         * @param writer
764         * @param file
765         * @param url
766         * @throws IOException
767         * @throws InterruptedException
768         */
769        private void _index(IndexWriter writer, Resource file,String url) throws IOException {
770            if(!file.exists()) return;
771            writer.addDocument(DocumentUtil.toDocument(file,url,SystemUtil.getCharset()));
772        }
773        
774    
775        
776        
777    
778        /**
779         * @param id
780         * @return returns the Index Directory
781         */
782        private Resource _getIndexDirectory(String id, boolean createIfNotExists) {
783            Resource indexDir=collectionDir.getRealResource(id);
784            if(createIfNotExists && !indexDir.exists())indexDir.mkdirs();
785            return indexDir;
786        }
787    
788        /**
789         * get writer to id
790         * @param id
791         * @return returns the Writer 
792         * @throws IOException
793         * @throws SearchException
794         * @throws IOException 
795         */
796        private IndexWriter _getWriter(String id,boolean create) throws SearchException, IOException {
797            // FUTURE support for none file -> Directory Object
798            Resource dir = _getIndexDirectory(id,true);
799            return new IndexWriter(FileWrapper.toFile(dir), SearchUtil.getAnalyzer(getLanguage()), create);
800            //return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), create);
801            /*try {
802                    return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), true);
803            } catch (IOException e) {
804                    ResourceUtil.removeChildrenEL(dir);
805                            dir.getResourceProvider().unlock(dir);
806                            return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()),true);
807                    }*/
808        }
809    
810        private IndexReader _getReader(String id,boolean absolute) throws IOException {
811            return _getReader(_getFile(id, absolute));
812        }  
813    
814        private IndexReader _getReader(File file) throws IOException {
815            if(!IndexReader.indexExists(file))throw new IOException("there is no index in ["+file+"]");
816            return IndexReader.open(file);
817        }  
818        
819        private File _getFile(String id,boolean absolute) throws IOException {
820            Resource res = absolute?ResourcesImpl.getFileResourceProvider().getResource(id):_getIndexDirectory(id,true);
821            res.getResourceProvider().read(res);
822            return FileWrapper.toFile(res);
823        }  
824        
825        /**
826         * @return returns all existing IndexWriter
827         */
828        private Resource[] _getIndexDirectories() {
829            Resource[] files = collectionDir.listResources(new DirectoryResourceFilter());
830            
831            return files;
832        }
833    
834        /**
835         * @return returns all existing IndexWriter
836         * @throws SearchException
837         */
838        private IndexWriter[] _getWriters(boolean create) throws SearchException {
839            Resource[] files = _getIndexDirectories();
840            if(files==null) return new IndexWriter[0];
841            
842            IndexWriter[] writers=new IndexWriter[files.length];
843            for(int i=0;i<files.length;i++) {
844                try {
845                    writers[i]=_getWriter(files[i].getName(),create);
846                } catch (IOException e) {}
847            }
848            return writers;
849        }
850        
851    
852        private int _countDocs(String col)  {
853            // FUTURE add support for none file resources
854            int totalDocs;
855            IndexReader reader=null;
856            try     {
857                    reader=_getReader(col,true);
858                    totalDocs = reader.numDocs();
859            }
860            catch(Exception e)      {
861                return 0;
862            }
863            finally {
864                    closeEL(reader);
865            }
866            return totalDocs;
867        }
868    
869        /**
870         * @deprecated see SearchUtil.getAnalyzer(String language);
871         * @param language
872         * @return returns language matching Analyzer
873         * @throws SearchException
874         */
875        public static Analyzer _getAnalyzer(String language) throws SearchException {
876            return SearchUtil.getAnalyzer(language);
877        }
878    
879        /** 
880         * check given language against collection language
881         * @param language
882         * @throws SearchException
883         */
884        private void _checkLanguage(String language) throws SearchException {
885            
886            if(language!=null && !language.trim().equalsIgnoreCase(getLanguage())) {
887                throw new SearchException("collection Language and Index Language must be of same type, but collection language is of type ["+getLanguage()+"] and index language is of type ["+language+"]");
888            }
889        }
890    
891            /**
892             * @see railo.runtime.search.SearchCollection#getDocumentCount()
893             */
894            public int getDocumentCount(String id) {
895                    try {
896                            if(!_getIndexDirectory(id,false).exists()) return 0;
897                            IndexReader r=null;
898                            int num=0;
899                            try {
900                                    r = _getReader(id,false);
901                                    num=r.numDocs();
902                            }
903                            finally {
904                                    close(r);
905                            }
906                            return num;
907                    }
908                    catch (Exception e) {}
909                    return 0;
910            }
911            
912            /**
913             * @see railo.runtime.search.SearchCollection#getDocumentCount()
914             */
915            public int getDocumentCount() {
916                    int count=0;
917                    SearchIndex[] _indexes = getIndexes();
918                    for(int i=0;i<_indexes.length;i++) {
919                            count+=getDocumentCount(_indexes[i].getId());
920                    }
921                    
922                    return count;
923            }
924    
925            /**
926             * @see railo.runtime.search.SearchCollection#getSize()
927             */
928            public long getSize() {
929                    return ResourceUtil.getRealSize(collectionDir)/1024;
930            }
931    
932            public Object getCategoryInfo() {
933                    Struct categories=new StructImpl();
934                    Struct categorytrees=new StructImpl();
935                    Struct info=new StructImpl();
936                    info.setEL("categories", categories);
937                    info.setEL("categorytrees", categorytrees);
938                    
939                    Iterator it = indexes.keySet().iterator();
940                    String[] cats;
941                    String catTree;
942                    Double tmp;
943                    
944                    while(it.hasNext()) {
945                            SearchIndex index=(SearchIndex) indexes.get(it.next());
946                            
947                            // category tree
948                            catTree = index.getCategoryTree();
949                            tmp=(Double) categorytrees.get(catTree,null);
950                            if(tmp==null) categorytrees.setEL(catTree,Caster.toDouble(1));
951                            else categorytrees.setEL(catTree,Caster.toDouble(tmp.doubleValue()+1));
952                            
953                            // categories
954                            cats = index.getCategories();
955                            for(int i=0;i<cats.length;i++) {
956                                    tmp=(Double) categories.get(cats[i],null);
957                                    if(tmp==null) categories.setEL(cats[i],Caster.toDouble(1));
958                                    else categories.setEL(cats[i],Caster.toDouble(tmp.doubleValue()+1));
959                            }
960                    }
961                    return info;
962            }
963    
964            class ResourceIndexWriter extends IndexWriter {
965    
966                    private Resource dir;
967    
968                    public ResourceIndexWriter(Resource dir, Analyzer analyzer, boolean create) throws IOException {
969                            
970                            super(FileWrapper.toFile(dir), analyzer, create);
971                            this.dir=dir;
972                            dir.getResourceProvider().lock(dir);
973                            
974                    }
975    
976                    /**
977                     *
978                     * @see org.apache.lucene.index.IndexWriter#close()
979                     */
980                    public synchronized void close() throws IOException {
981                            super.close();
982                            dir.getResourceProvider().unlock(dir);
983                    }
984                    
985            }
986        
987            private Resource _createSpellDirectory(String id) {
988            Resource indexDir=collectionDir.getRealResource(id+"_"+(_getMax(true)+1)+"_spell");
989            //print.out("create:"+indexDir);
990            indexDir.mkdirs();
991            return indexDir;
992        }
993        
994        private Resource _getSpellDirectory(String id) {
995            Resource indexDir=collectionDir.getRealResource(id+"_"+_getMax(false)+"_spell");
996            //print.out("get:"+indexDir);
997            return indexDir;
998        }
999    
1000        private long _getMax(boolean delete) {
1001            Resource[] children = collectionDir.listResources(new SpellDirFilter());
1002            long max=0, nbr;
1003            String name;
1004            for(int i=0;i<children.length;i++) {
1005                    name=children[i].getName();
1006                    name=name.substring(0,name.length()-6);
1007                    nbr=Caster.toLongValue(name.substring(name.lastIndexOf('_')+1),0);
1008                    if(delete){
1009                            try {
1010                                            children[i].remove(true);
1011                                            continue;
1012                                    } 
1013                            catch (Throwable t) {}
1014                    }
1015                    if(nbr>max)max=nbr;
1016            }
1017            return max;
1018        }
1019        
1020        private void info(String doc) {
1021                    if(log==null) return;
1022                    log.info("Collection:"+getName(), "indexing "+doc);
1023            }
1024            
1025            public class SpellDirFilter implements ResourceNameFilter {
1026    
1027                    /**
1028                     * filter all names with the following pattern [<name>_<count>_spell]
1029                     * 
1030                     * @see railo.commons.io.res.filter.ResourceNameFilter#accept(railo.commons.io.res.Resource, java.lang.String)
1031                     */
1032                    public boolean accept(Resource parent, String name) {
1033                            return name.endsWith("_spell");
1034                    }
1035    
1036            }
1037    }