001 package railo.runtime.search.lucene2; 002 003 import java.io.File; 004 import java.io.IOException; 005 import java.net.URL; 006 import java.util.ArrayList; 007 import java.util.HashMap; 008 import java.util.HashSet; 009 import java.util.Iterator; 010 import java.util.Map; 011 import java.util.Map.Entry; 012 import java.util.Set; 013 014 import org.apache.lucene.analysis.Analyzer; 015 import org.apache.lucene.document.Document; 016 import org.apache.lucene.index.IndexReader; 017 import org.apache.lucene.index.IndexWriter; 018 import org.apache.lucene.queryParser.ParseException; 019 import org.apache.lucene.queryParser.QueryParser; 020 import org.apache.lucene.search.Hits; 021 import org.apache.lucene.search.IndexSearcher; 022 import org.apache.lucene.search.Query; 023 import org.apache.lucene.search.Searcher; 024 import org.apache.lucene.search.spell.Dictionary; 025 import org.apache.lucene.search.spell.LuceneDictionary; 026 import org.apache.lucene.search.spell.SpellChecker; 027 import org.apache.lucene.store.FSDirectory; 028 029 import railo.commons.io.SystemUtil; 030 import railo.commons.io.log.LogAndSource; 031 import railo.commons.io.res.Resource; 032 import railo.commons.io.res.ResourcesImpl; 033 import railo.commons.io.res.filter.DirectoryResourceFilter; 034 import railo.commons.io.res.filter.ResourceFilter; 035 import railo.commons.io.res.filter.ResourceNameFilter; 036 import railo.commons.io.res.util.FileWrapper; 037 import railo.commons.io.res.util.ResourceUtil; 038 import railo.commons.lang.SerializableObject; 039 import railo.commons.lang.StringUtil; 040 import railo.runtime.op.Caster; 041 import railo.runtime.search.AddionalAttrs; 042 import railo.runtime.search.IndexResult; 043 import railo.runtime.search.IndexResultImpl; 044 import railo.runtime.search.SearchCollectionSupport; 045 import railo.runtime.search.SearchData; 046 import railo.runtime.search.SearchEngineSupport; 047 import railo.runtime.search.SearchException; 048 import railo.runtime.search.SearchIndex; 049 import railo.runtime.search.SearchResulItem; 050 import railo.runtime.search.SearchResulItemImpl; 051 import railo.runtime.search.SuggestionItem; 052 import railo.runtime.search.lucene2.docs.CustomDocument; 053 import railo.runtime.search.lucene2.highlight.Highlight; 054 import railo.runtime.search.lucene2.net.WebCrawler; 055 import railo.runtime.search.lucene2.query.Literal; 056 import railo.runtime.search.lucene2.query.Op; 057 import railo.runtime.type.List; 058 import railo.runtime.type.QueryColumn; 059 import railo.runtime.type.Struct; 060 import railo.runtime.type.StructImpl; 061 import railo.runtime.type.dt.DateTime; 062 063 /** 064 * 065 */ 066 public final class LuceneSearchCollection extends SearchCollectionSupport { 067 068 private static final long serialVersionUID = 3430238280421965781L; 069 070 private Resource collectionDir; 071 private boolean spellcheck; 072 private LogAndSource log; 073 private static final SerializableObject token=new SerializableObject(); 074 075 076 /** 077 * @param searchEngine 078 * @param name 079 * @param path 080 * @param language 081 * @param lastUpdate 082 * @param created 083 */ 084 public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 085 DateTime lastUpdate, DateTime created,boolean spellcheck) { 086 super(searchEngine, name, path, language, lastUpdate,created); 087 this.spellcheck=spellcheck; 088 collectionDir=getPath().getRealResource(StringUtil.toIdentityVariableName(getName())); 089 090 log=searchEngine.getLogger(); 091 092 } 093 094 public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 095 DateTime lastUpdate, DateTime created) { 096 this(searchEngine, name, path, language, lastUpdate, created, true); 097 } 098 099 /** 100 * @see railo.runtime.search.SearchCollection#_create() 101 */ 102 protected void _create() throws SearchException { 103 try { 104 if(!collectionDir.exists())collectionDir.createDirectory(true); 105 } 106 catch (IOException e) {} 107 } 108 109 /** 110 * @see railo.runtime.search.SearchCollection#_optimize() 111 */ 112 protected void _optimize() throws SearchException { 113 IndexWriter[] writers=_getWriters(false); 114 for(int i=0;i<writers.length;i++) { 115 try { 116 optimizeEL(writers[i]); 117 } 118 finally { 119 close(writers[i]); 120 } 121 } 122 } 123 124 /** 125 * 126 * @see railo.runtime.search.SearchCollectionSupport#_map(railo.commons.io.res.Resource) 127 */ 128 protected void _map(Resource path) throws SearchException { 129 throw new SearchException("mapping of existing Collection for file ["+path+"] not supported"); 130 } 131 132 /** 133 * @see railo.runtime.search.SearchCollection#_repair() 134 */ 135 protected void _repair() throws SearchException { 136 //throw new SearchException("repair of existing Collection not supported"); 137 } 138 139 /** 140 * @see railo.runtime.search.SearchCollectionSupport#_indexFile(java.lang.String, java.lang.String, railo.commons.io.res.Resource, java.lang.String) 141 */ 142 protected IndexResult _indexFile(String id, String title, Resource res,String language) throws SearchException { 143 info(res.getAbsolutePath()); 144 _checkLanguage(language); 145 int before=getDocumentCount(id); 146 IndexWriter writer=null; 147 synchronized(token){ 148 try { 149 writer = _getWriter(id,true); 150 _index(writer,res,res.getName()); 151 writer.optimize(); 152 } 153 catch (Exception e) { 154 throw new SearchException(e); 155 } 156 finally { 157 close(writer); 158 } 159 indexSpellCheck(id); 160 } 161 if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1); 162 return new IndexResultImpl(0,1,0); 163 } 164 165 166 167 /** 168 * @see railo.runtime.search.SearchCollectionSupport#_indexPath(java.lang.String, java.lang.String, railo.commons.io.res.Resource, java.lang.String[], boolean, java.lang.String) 169 */ 170 protected IndexResult _indexPath(String id, String title, Resource dir,String[] extensions, boolean recurse, String language) throws SearchException { 171 info(dir.getAbsolutePath()); 172 _checkLanguage(language); 173 int doccount=0; 174 IndexWriter writer=null; 175 synchronized(token){ 176 try { 177 writer = _getWriter(id,true); 178 doccount=_list(0,writer,dir,new LuceneExtensionFileFilter(extensions,recurse),""); 179 //optimizeEL(writer); 180 writer.optimize(); 181 } 182 catch (IOException e) { 183 throw new SearchException(e); 184 } 185 finally { 186 close(writer); 187 } 188 indexSpellCheck(id); 189 } 190 191 192 193 return new IndexResultImpl(0,0,doccount); 194 } 195 196 private void optimizeEL(IndexWriter writer) { 197 if(writer==null)return; 198 try { 199 writer.optimize(); 200 } 201 catch (Throwable t) { 202 //print.printST(t); 203 } 204 } 205 206 private void indexSpellCheck(String id) throws SearchException { 207 if(!spellcheck) return; 208 209 IndexReader reader=null; 210 FSDirectory spellDir=null; 211 212 Resource dir = _createSpellDirectory(id); 213 try { 214 File spellFile = FileWrapper.toFile(dir); 215 spellDir = FSDirectory.getDirectory(spellFile); 216 reader = _getReader(id,false); 217 Dictionary dictionary = new LuceneDictionary(reader,"contents"); 218 219 SpellChecker spellChecker = new SpellChecker(spellDir); 220 spellChecker.indexDictionary(dictionary); 221 222 } 223 catch(IOException ioe) { 224 throw new SearchException(ioe); 225 } 226 finally { 227 flushEL(reader); 228 closeEL(reader); 229 } 230 } 231 232 private void close(IndexWriter writer) throws SearchException { 233 if(writer!=null){ 234 //print.out("w-close"); 235 try { 236 writer.close(); 237 } catch (IOException e) { 238 throw new SearchException(e); 239 } 240 } 241 } 242 243 private static void close(IndexReader reader) throws SearchException { 244 if(reader!=null){ 245 try { 246 reader.close(); 247 } catch (IOException e) { 248 throw new SearchException(e); 249 } 250 } 251 } 252 253 private static void close(Searcher searcher) throws SearchException { 254 if(searcher!=null){ 255 try { 256 searcher.close(); 257 } catch (IOException e) { 258 throw new SearchException(e); 259 } 260 } 261 } 262 263 private static void flushEL(IndexReader reader) { 264 //print.out("r-closeEL"); 265 if(reader!=null){ 266 try { 267 reader.flush(); 268 } catch (Throwable t) { 269 //throw new SearchException(t); 270 } 271 } 272 } 273 private static void closeEL(IndexReader reader) { 274 //print.out("r-closeEL"); 275 if(reader!=null){ 276 try { 277 reader.close(); 278 } catch (Throwable t) { 279 //throw new SearchException(t); 280 } 281 } 282 } 283 284 /** 285 * @see railo.runtime.search.SearchCollection#_indexURL(java.lang.String, java.lang.String, java.net.URL, java.lang.String[], boolean, java.lang.String) 286 */ 287 protected IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language)throws SearchException { 288 //timeout=ThreadLocalPageContext.getConfig().getRequestTimeout().getMillis(); 289 return _indexURL(id, title, url, extensions, recurse, language,50000L); 290 } 291 public IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language, long timeout)throws SearchException { 292 _checkLanguage(language); 293 info(url.toExternalForm()); 294 int before=getDocumentCount(id); 295 IndexWriter writer=null; 296 synchronized(token){ 297 try { 298 writer = _getWriter(id,true); 299 new WebCrawler(log).parse(writer, url, extensions, recurse,timeout); 300 301 writer.optimize(); 302 } 303 catch (Exception e) { 304 throw new SearchException(e); 305 } 306 finally { 307 close(writer); 308 } 309 indexSpellCheck(id); 310 } 311 if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1); 312 return new IndexResultImpl(0,1,0); 313 //throw new SearchException("url indexing not supported"); 314 315 } 316 317 /** 318 * @param id 319 * @param title 320 * @param keyColumn 321 * @param bodyColumns 322 * @param language 323 * @param custom1 324 * @param custom2 325 * @param custom3 326 * @param custom4 327 * @return 328 * @throws SearchException 329 */ 330 protected IndexResult _deleteCustom(String id,QueryColumn keyColumn) throws SearchException { 331 332 int countBefore=0; 333 int countAfter=0; 334 335 Map<String,Document> docs=new HashMap<String,Document>(); 336 337 Set<String> keys=toSet(keyColumn); 338 IndexWriter writer=null; 339 String key; 340 IndexReader reader=null; 341 Document doc; 342 343 synchronized(token){ 344 try { 345 try { 346 reader=_getReader(id,false); 347 countBefore=reader.maxDoc(); 348 for(int i=0;i<countBefore;i++) { 349 doc=reader.document(i); 350 key=doc.getField("key").stringValue(); 351 if(!keys.contains(key)) 352 docs.put(key,doc); 353 } 354 } 355 catch(Exception e) {} 356 finally { 357 close(reader); 358 } 359 countAfter=docs.size(); 360 361 362 writer = _getWriter(id,true); 363 Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); 364 while(it.hasNext()) { 365 writer.addDocument(it.next().getValue()); 366 } 367 optimizeEL(writer); 368 369 } catch (IOException e) { 370 throw new SearchException(e); 371 } 372 finally { 373 close(writer); 374 } 375 indexSpellCheck(id); 376 } 377 int removes=countBefore-countAfter; 378 379 return new IndexResultImpl(removes,0,0); 380 } 381 382 private Set<String> toSet(QueryColumn column) { 383 Set<String> set=new HashSet<String>(); 384 Iterator it = column.valueIterator(); 385 while(it.hasNext()){ 386 set.add(Caster.toString(it.next(),null)); 387 } 388 return set; 389 } 390 391 /** 392 * @param id 393 * @param title 394 * @param keyColumn 395 * @param bodyColumns 396 * @param language 397 * @param custom1 398 * @param custom2 399 * @param custom3 400 * @param custom4 401 * @return 402 * @throws SearchException 403 */ 404 protected IndexResult _indexCustom(String id, Object title, QueryColumn keyColumn, QueryColumn[] bodyColumns, String language, 405 Object urlpath,Object custom1,Object custom2,Object custom3,Object custom4) throws SearchException { 406 _checkLanguage(language); 407 String t; 408 String url; 409 String c1; 410 String c2; 411 String c3; 412 String c4; 413 414 int countExisting=0; 415 int countAdd=keyColumn.size(); 416 int countNew=0; 417 418 Map<String,Document> docs=new HashMap<String,Document>(); 419 IndexWriter writer=null; 420 synchronized(token){ 421 try { 422 // read existing reader 423 IndexReader reader=null; 424 try { 425 reader=_getReader(id,false); 426 int len=reader.maxDoc(); 427 Document doc; 428 for(int i=0;i<len;i++) { 429 doc=reader.document(i); 430 docs.put(doc.getField("key").stringValue(),doc); 431 } 432 } 433 catch(Exception e) {} 434 finally { 435 close(reader); 436 } 437 438 countExisting=docs.size(); 439 writer = _getWriter(id,true); 440 int len = keyColumn.size(); 441 String key; 442 for(int i=1;i<=len;i++) { 443 key=Caster.toString(keyColumn.get(i,null),null); 444 if(key==null) continue; 445 446 StringBuilder body=new StringBuilder(); 447 for(int y=0;y<bodyColumns.length;y++) { 448 Object tmp=bodyColumns[y].get(i,null); 449 if(tmp!=null){ 450 body.append(tmp.toString()); 451 body.append(' '); 452 } 453 } 454 //t=(title==null)?null:Caster.toString(title.get(i,null),null); 455 //url=(urlpath==null)?null:Caster.toString(urlpath.get(i,null),null); 456 457 t=getRow(title,i); 458 url=getRow(urlpath,i); 459 c1=getRow(custom1,i); 460 c2=getRow(custom2,i); 461 c3=getRow(custom3,i); 462 c4=getRow(custom4,i); 463 464 docs.put(key,CustomDocument.getDocument(t,key,body.toString(),url,c1,c2,c3,c4)); 465 } 466 countNew=docs.size(); 467 Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); 468 Entry<String, Document> entry; 469 Document doc; 470 while(it.hasNext()) { 471 entry = it.next(); 472 doc = entry.getValue(); 473 writer.addDocument(doc); 474 } 475 optimizeEL(writer); 476 //writer.optimize(); 477 478 } 479 catch(IOException ioe) { 480 throw new SearchException(ioe); 481 } 482 finally { 483 close(writer); 484 } 485 indexSpellCheck(id); 486 } 487 int inserts=countNew-countExisting; 488 489 return new IndexResultImpl(0,inserts,countAdd-inserts); 490 } 491 492 private String getRow(Object column, int row) { 493 if(column instanceof QueryColumn){ 494 return Caster.toString(((QueryColumn)column).get(row,null),null); 495 } 496 if(column!=null) return Caster.toString(column,null); 497 return null; 498 } 499 500 /** 501 * @see railo.runtime.search.SearchCollection#_purge() 502 */ 503 protected IndexResult _purge() throws SearchException { 504 SearchIndex[] indexes=getIndexes(); 505 int count=0; 506 for(int i=0;i<indexes.length;i++) { 507 count+=getDocumentCount(indexes[i].getId()); 508 } 509 ResourceUtil.removeChildrenEL(collectionDir); 510 return new IndexResultImpl(count,0,0); 511 } 512 513 /** 514 * @see railo.runtime.search.SearchCollection#_delete() 515 */ 516 protected IndexResult _delete() throws SearchException { 517 SearchIndex[] indexes=getIndexes(); 518 int count=0; 519 for(int i=0;i<indexes.length;i++) { 520 count+=getDocumentCount(indexes[i].getId()); 521 } 522 ResourceUtil.removeEL(collectionDir, true); 523 return new IndexResultImpl(count,0,0); 524 } 525 526 /** 527 * @see railo.runtime.search.SearchCollectionSupport#_deleteIndex(java.lang.String) 528 */ 529 protected IndexResult _deleteIndex(String id) throws SearchException { 530 int count=getDocumentCount(id); 531 ResourceUtil.removeEL(_getIndexDirectory(id,true), true); 532 return new IndexResultImpl(count,0,0); 533 } 534 535 /** 536 * @see railo.runtime.search.SearchCollection#_search(railo.runtime.search.SearchData, java.lang.String, java.lang.String, short, java.lang.String, java.lang.String[]) 537 */ 538 public SearchResulItem[] _search(SearchData data, String criteria, String language,short type, 539 String categoryTree, String[] category) throws SearchException { 540 try { 541 542 if(type!=SEARCH_TYPE_SIMPLE) throw new SearchException("search type explicit not supported"); 543 Analyzer analyzer = SearchUtil.getAnalyzer(language); 544 Query query=null; 545 Op op=null; 546 Object highlighter=null; 547 railo.runtime.search.lucene2.query.QueryParser queryParser=new railo.runtime.search.lucene2.query.QueryParser(); 548 AddionalAttrs aa = AddionalAttrs.getAddionlAttrs(); 549 aa.setHasRowHandling(true); 550 int startrow=aa.getStartrow(); 551 int maxrows=aa.getMaxrows(); 552 553 554 if(!criteria.equals("*")) { 555 // FUTURE take this data from calling parameters 556 op=queryParser.parseOp(criteria); 557 if(op==null) criteria="*"; 558 else criteria=op.toString(); 559 try { 560 561 query = new QueryParser("contents",analyzer ).parse(criteria); 562 highlighter = Highlight.createHighlighter(query,aa.getContextHighlightBegin(),aa.getContextHighlightEnd()); 563 564 565 } 566 catch (ParseException e) { 567 throw new SearchException(e); 568 } 569 } 570 571 Resource[] files = _getIndexDirectories(); 572 573 if(files==null) return new SearchResulItem[0]; 574 ArrayList<SearchResulItem> list=new ArrayList<SearchResulItem>(); 575 String ct,c; 576 577 ArrayList<String> spellCheckIndex=spellcheck?new ArrayList<String>():null; 578 579 int count=0; 580 IndexReader reader = null; 581 Searcher searcher = null; 582 try { 583 outer:for(int i=0;i<files.length;i++) { 584 if(removeCorrupt(files[i]))continue; 585 String strFile=files[i].toString(); 586 SearchIndex si = (SearchIndex)indexes.get(files[i].getName()); 587 588 if(si==null)continue; 589 ct=si.getCategoryTree(); 590 c=List.arrayToList(si.getCategories(), ","); 591 592 // check category tree 593 if(!matchCategoryTree(ct,categoryTree))continue; 594 if(!matchCategories(si.getCategories(),category))continue; 595 596 Document doc; 597 String id=files[i].getName(); 598 data.addRecordsSearched(_countDocs(strFile)); 599 600 reader = _getReader(id,false); 601 if(query==null && "*".equals(criteria)) { 602 int len=reader.numDocs(); 603 for(int y=0;y<len;y++) { 604 if(startrow>++count)continue; 605 if(maxrows>-1 && list.size()>=maxrows) break outer; 606 doc = reader.document(y); 607 list.add(createSearchResulItem(highlighter,analyzer,doc,id,1,ct,c,aa.getContextPassages(),aa.getContextBytes())); 608 } 609 } 610 else { 611 if(spellcheck)spellCheckIndex.add(id); 612 // search 613 searcher = new IndexSearcher(reader); 614 Hits hits = searcher.search(query); 615 int len=hits.length(); 616 for (int y=0; y<len; y++) { 617 if(startrow>++count)continue; 618 if(maxrows>-1 && list.size()>=maxrows) break outer; 619 //list.add(new SearchResulItemHits(hits,y,highlighter,analyzer,id,ct,c,aa.getContextPassages(),aa.getContextBytes())); 620 doc = hits.doc(y); 621 list.add(createSearchResulItem(highlighter,analyzer,doc,id,hits.score(y),ct,c,aa.getContextPassages(),aa.getContextBytes())); 622 } 623 624 } 625 626 } 627 } 628 finally { 629 close(reader); 630 close(searcher); 631 } 632 633 // spellcheck 634 //SearchData data=ThreadLocalSearchData.get(); 635 if(spellcheck && data!=null) { 636 if(data.getSuggestionMax()>=list.size()) { 637 638 Map suggestions = data.getSuggestion(); 639 Iterator it = spellCheckIndex.iterator(); 640 String id; 641 Literal[] literals = queryParser.getLiteralSearchedTerms(); 642 String[] strLiterals = queryParser.getStringSearchedTerms(); 643 boolean setSuggestionQuery=false; 644 while(it.hasNext()) { 645 id=(String) it.next(); 646 // add to set to remove duplicate values 647 SuggestionItem si; 648 SpellChecker sc = getSpellChecker(id); 649 for(int i=0;i<strLiterals.length;i++) { 650 String[] arr = sc.suggestSimilar(strLiterals[i], 1000); 651 if(arr.length>0){ 652 literals[i].set("<suggestion>"+arr[0]+"</suggestion>"); 653 setSuggestionQuery=true; 654 655 si=(SuggestionItem) suggestions.get(strLiterals[i]); 656 if(si==null)suggestions.put(strLiterals[i],new SuggestionItem(arr)); 657 else si.add(arr); 658 } 659 } 660 } 661 if(setSuggestionQuery)data.setSuggestionQuery(op.toString()); 662 } 663 } 664 665 return list.toArray(new SearchResulItem[list.size()]); 666 } 667 catch (IOException e) { throw new SearchException(e); } 668 669 } 670 671 private SpellChecker getSpellChecker(String id) throws IOException { 672 FSDirectory siDir = FSDirectory.getDirectory(FileWrapper.toFile(_getSpellDirectory(id))); 673 SpellChecker spellChecker = new SpellChecker(siDir); 674 return spellChecker; 675 } 676 677 private boolean removeCorrupt(Resource dir) { 678 if(ResourceUtil.isEmptyFile(dir)) { 679 ResourceUtil.removeEL(dir, true); 680 return true; 681 } 682 return false; 683 } 684 685 private static SearchResulItem createSearchResulItem(Object highlighter,Analyzer a,Document doc, String name, float score, String ct, String c,int maxNumFragments, int maxLength) { 686 String contextSummary=""; 687 if(maxNumFragments>0) 688 contextSummary=Highlight.createContextSummary(highlighter,a,doc.get("contents"),maxNumFragments,maxLength,doc.get("summary")); 689 String summary = doc.get("summary"); 690 691 return new SearchResulItemImpl( 692 name, 693 doc.get("title"), 694 score, 695 doc.get("key"), 696 doc.get("url"), 697 summary,contextSummary, 698 ct,c, 699 doc.get("custom1"), 700 doc.get("custom2"), 701 doc.get("custom3"), 702 doc.get("custom4"), 703 doc.get("mime-type"), 704 doc.get("author"), 705 doc.get("size")); 706 707 } 708 709 private boolean matchCategories(String[] categoryIndex, String[] categorySearch) { 710 if(categorySearch==null ||categorySearch.length==0) return true; 711 String search; 712 for(int s=0;s<categorySearch.length;s++) { 713 search=categorySearch[s]; 714 for(int i=0;i<categoryIndex.length;i++) { 715 if(search.equals(categoryIndex[i]))return true; 716 } 717 } 718 return false; 719 } 720 721 private boolean matchCategoryTree(String categoryTreeIndex, String categoryTreeSearch) { 722 //if(StringUtil.isEmpty(categoryTreeIndex) || categoryTreeIndex.equals("/")) return true; 723 //if(StringUtil.isEmpty(categoryTreeSearch) || categoryTreeSearch.equals("/")) return true; 724 return categoryTreeIndex.startsWith(categoryTreeSearch); 725 } 726 727 /** 728 * list a directory and call every file 729 * @param writer 730 * @param res 731 * @param filter 732 * @param url 733 * @throws IOException 734 * @throws InterruptedException 735 */ 736 private int _list(int doccount,IndexWriter writer, Resource res,ResourceFilter filter,String url) { 737 738 if (res.isReadable()) { 739 if (res.exists() && res.isDirectory()) { 740 Resource[] files = (filter==null)?res.listResources():res.listResources(filter); 741 if (files != null) { 742 for (int i = 0; i < files.length; i++) { 743 if(removeCorrupt(files[i])){ 744 continue; 745 } 746 doccount=_list(doccount,writer, files[i],filter,url+"/"+files[i].getName()); 747 } 748 } 749 } 750 else { 751 try { 752 info(res.getAbsolutePath()); 753 _index(writer,res,url); 754 doccount++; 755 } catch (Exception e) {} 756 } 757 } 758 return doccount; 759 } 760 761 /** 762 * index a single file 763 * @param writer 764 * @param file 765 * @param url 766 * @throws IOException 767 * @throws InterruptedException 768 */ 769 private void _index(IndexWriter writer, Resource file,String url) throws IOException { 770 if(!file.exists()) return; 771 writer.addDocument(DocumentUtil.toDocument(file,url,SystemUtil.getCharset())); 772 } 773 774 775 776 777 778 /** 779 * @param id 780 * @return returns the Index Directory 781 */ 782 private Resource _getIndexDirectory(String id, boolean createIfNotExists) { 783 Resource indexDir=collectionDir.getRealResource(id); 784 if(createIfNotExists && !indexDir.exists())indexDir.mkdirs(); 785 return indexDir; 786 } 787 788 /** 789 * get writer to id 790 * @param id 791 * @return returns the Writer 792 * @throws IOException 793 * @throws SearchException 794 * @throws IOException 795 */ 796 private IndexWriter _getWriter(String id,boolean create) throws SearchException, IOException { 797 // FUTURE support for none file -> Directory Object 798 Resource dir = _getIndexDirectory(id,true); 799 return new IndexWriter(FileWrapper.toFile(dir), SearchUtil.getAnalyzer(getLanguage()), create); 800 //return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), create); 801 /*try { 802 return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), true); 803 } catch (IOException e) { 804 ResourceUtil.removeChildrenEL(dir); 805 dir.getResourceProvider().unlock(dir); 806 return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()),true); 807 }*/ 808 } 809 810 private IndexReader _getReader(String id,boolean absolute) throws IOException { 811 return _getReader(_getFile(id, absolute)); 812 } 813 814 private IndexReader _getReader(File file) throws IOException { 815 if(!IndexReader.indexExists(file))throw new IOException("there is no index in ["+file+"]"); 816 return IndexReader.open(file); 817 } 818 819 private File _getFile(String id,boolean absolute) throws IOException { 820 Resource res = absolute?ResourcesImpl.getFileResourceProvider().getResource(id):_getIndexDirectory(id,true); 821 res.getResourceProvider().read(res); 822 return FileWrapper.toFile(res); 823 } 824 825 /** 826 * @return returns all existing IndexWriter 827 */ 828 private Resource[] _getIndexDirectories() { 829 Resource[] files = collectionDir.listResources(new DirectoryResourceFilter()); 830 831 return files; 832 } 833 834 /** 835 * @return returns all existing IndexWriter 836 * @throws SearchException 837 */ 838 private IndexWriter[] _getWriters(boolean create) throws SearchException { 839 Resource[] files = _getIndexDirectories(); 840 if(files==null) return new IndexWriter[0]; 841 842 IndexWriter[] writers=new IndexWriter[files.length]; 843 for(int i=0;i<files.length;i++) { 844 try { 845 writers[i]=_getWriter(files[i].getName(),create); 846 } catch (IOException e) {} 847 } 848 return writers; 849 } 850 851 852 private int _countDocs(String col) { 853 // FUTURE add support for none file resources 854 int totalDocs; 855 IndexReader reader=null; 856 try { 857 reader=_getReader(col,true); 858 totalDocs = reader.numDocs(); 859 } 860 catch(Exception e) { 861 return 0; 862 } 863 finally { 864 closeEL(reader); 865 } 866 return totalDocs; 867 } 868 869 /** 870 * @deprecated see SearchUtil.getAnalyzer(String language); 871 * @param language 872 * @return returns language matching Analyzer 873 * @throws SearchException 874 */ 875 public static Analyzer _getAnalyzer(String language) throws SearchException { 876 return SearchUtil.getAnalyzer(language); 877 } 878 879 /** 880 * check given language against collection language 881 * @param language 882 * @throws SearchException 883 */ 884 private void _checkLanguage(String language) throws SearchException { 885 886 if(language!=null && !language.trim().equalsIgnoreCase(getLanguage())) { 887 throw new SearchException("collection Language and Index Language must be of same type, but collection language is of type ["+getLanguage()+"] and index language is of type ["+language+"]"); 888 } 889 } 890 891 /** 892 * @see railo.runtime.search.SearchCollection#getDocumentCount() 893 */ 894 public int getDocumentCount(String id) { 895 try { 896 if(!_getIndexDirectory(id,false).exists()) return 0; 897 IndexReader r=null; 898 int num=0; 899 try { 900 r = _getReader(id,false); 901 num=r.numDocs(); 902 } 903 finally { 904 close(r); 905 } 906 return num; 907 } 908 catch (Exception e) {} 909 return 0; 910 } 911 912 /** 913 * @see railo.runtime.search.SearchCollection#getDocumentCount() 914 */ 915 public int getDocumentCount() { 916 int count=0; 917 SearchIndex[] _indexes = getIndexes(); 918 for(int i=0;i<_indexes.length;i++) { 919 count+=getDocumentCount(_indexes[i].getId()); 920 } 921 922 return count; 923 } 924 925 /** 926 * @see railo.runtime.search.SearchCollection#getSize() 927 */ 928 public long getSize() { 929 return ResourceUtil.getRealSize(collectionDir)/1024; 930 } 931 932 public Object getCategoryInfo() { 933 Struct categories=new StructImpl(); 934 Struct categorytrees=new StructImpl(); 935 Struct info=new StructImpl(); 936 info.setEL("categories", categories); 937 info.setEL("categorytrees", categorytrees); 938 939 Iterator it = indexes.keySet().iterator(); 940 String[] cats; 941 String catTree; 942 Double tmp; 943 944 while(it.hasNext()) { 945 SearchIndex index=(SearchIndex) indexes.get(it.next()); 946 947 // category tree 948 catTree = index.getCategoryTree(); 949 tmp=(Double) categorytrees.get(catTree,null); 950 if(tmp==null) categorytrees.setEL(catTree,Caster.toDouble(1)); 951 else categorytrees.setEL(catTree,Caster.toDouble(tmp.doubleValue()+1)); 952 953 // categories 954 cats = index.getCategories(); 955 for(int i=0;i<cats.length;i++) { 956 tmp=(Double) categories.get(cats[i],null); 957 if(tmp==null) categories.setEL(cats[i],Caster.toDouble(1)); 958 else categories.setEL(cats[i],Caster.toDouble(tmp.doubleValue()+1)); 959 } 960 } 961 return info; 962 } 963 964 class ResourceIndexWriter extends IndexWriter { 965 966 private Resource dir; 967 968 public ResourceIndexWriter(Resource dir, Analyzer analyzer, boolean create) throws IOException { 969 970 super(FileWrapper.toFile(dir), analyzer, create); 971 this.dir=dir; 972 dir.getResourceProvider().lock(dir); 973 974 } 975 976 /** 977 * 978 * @see org.apache.lucene.index.IndexWriter#close() 979 */ 980 public synchronized void close() throws IOException { 981 super.close(); 982 dir.getResourceProvider().unlock(dir); 983 } 984 985 } 986 987 private Resource _createSpellDirectory(String id) { 988 Resource indexDir=collectionDir.getRealResource(id+"_"+(_getMax(true)+1)+"_spell"); 989 //print.out("create:"+indexDir); 990 indexDir.mkdirs(); 991 return indexDir; 992 } 993 994 private Resource _getSpellDirectory(String id) { 995 Resource indexDir=collectionDir.getRealResource(id+"_"+_getMax(false)+"_spell"); 996 //print.out("get:"+indexDir); 997 return indexDir; 998 } 999 1000 private long _getMax(boolean delete) { 1001 Resource[] children = collectionDir.listResources(new SpellDirFilter()); 1002 long max=0, nbr; 1003 String name; 1004 for(int i=0;i<children.length;i++) { 1005 name=children[i].getName(); 1006 name=name.substring(0,name.length()-6); 1007 nbr=Caster.toLongValue(name.substring(name.lastIndexOf('_')+1),0); 1008 if(delete){ 1009 try { 1010 children[i].remove(true); 1011 continue; 1012 } 1013 catch (Throwable t) {} 1014 } 1015 if(nbr>max)max=nbr; 1016 } 1017 return max; 1018 } 1019 1020 private void info(String doc) { 1021 if(log==null) return; 1022 log.info("Collection:"+getName(), "indexing "+doc); 1023 } 1024 1025 public class SpellDirFilter implements ResourceNameFilter { 1026 1027 /** 1028 * filter all names with the following pattern [<name>_<count>_spell] 1029 * 1030 * @see railo.commons.io.res.filter.ResourceNameFilter#accept(railo.commons.io.res.Resource, java.lang.String) 1031 */ 1032 public boolean accept(Resource parent, String name) { 1033 return name.endsWith("_spell"); 1034 } 1035 1036 } 1037 }