001 package railo.runtime.search.lucene2; 002 003 import java.io.File; 004 import java.io.IOException; 005 import java.net.URL; 006 import java.util.ArrayList; 007 import java.util.HashMap; 008 import java.util.HashSet; 009 import java.util.Iterator; 010 import java.util.Map; 011 import java.util.Map.Entry; 012 import java.util.Set; 013 014 import org.apache.lucene.analysis.Analyzer; 015 import org.apache.lucene.document.Document; 016 import org.apache.lucene.index.IndexReader; 017 import org.apache.lucene.index.IndexWriter; 018 import org.apache.lucene.queryParser.ParseException; 019 import org.apache.lucene.queryParser.QueryParser; 020 import org.apache.lucene.search.Hits; 021 import org.apache.lucene.search.IndexSearcher; 022 import org.apache.lucene.search.Query; 023 import org.apache.lucene.search.Searcher; 024 import org.apache.lucene.search.spell.Dictionary; 025 import org.apache.lucene.search.spell.LuceneDictionary; 026 import org.apache.lucene.search.spell.SpellChecker; 027 import org.apache.lucene.store.FSDirectory; 028 029 import railo.commons.io.SystemUtil; 030 import railo.commons.io.log.LogAndSource; 031 import railo.commons.io.res.Resource; 032 import railo.commons.io.res.ResourcesImpl; 033 import railo.commons.io.res.filter.DirectoryResourceFilter; 034 import railo.commons.io.res.filter.ResourceFilter; 035 import railo.commons.io.res.filter.ResourceNameFilter; 036 import railo.commons.io.res.util.FileWrapper; 037 import railo.commons.io.res.util.ResourceUtil; 038 import railo.commons.lang.SerializableObject; 039 import railo.commons.lang.StringUtil; 040 import railo.runtime.op.Caster; 041 import railo.runtime.search.AddionalAttrs; 042 import railo.runtime.search.IndexResult; 043 import railo.runtime.search.IndexResultImpl; 044 import railo.runtime.search.SearchCollectionSupport; 045 import railo.runtime.search.SearchData; 046 import railo.runtime.search.SearchEngineSupport; 047 import railo.runtime.search.SearchException; 048 import railo.runtime.search.SearchIndex; 049 import railo.runtime.search.SearchResulItem; 050 import railo.runtime.search.SearchResulItemImpl; 051 import railo.runtime.search.SuggestionItem; 052 import railo.runtime.search.lucene2.docs.CustomDocument; 053 import railo.runtime.search.lucene2.highlight.Highlight; 054 import railo.runtime.search.lucene2.net.WebCrawler; 055 import railo.runtime.search.lucene2.query.Literal; 056 import railo.runtime.search.lucene2.query.Op; 057 import railo.runtime.type.QueryColumn; 058 import railo.runtime.type.Struct; 059 import railo.runtime.type.StructImpl; 060 import railo.runtime.type.dt.DateTime; 061 import railo.runtime.type.util.ListUtil; 062 063 /** 064 * 065 */ 066 public final class LuceneSearchCollection extends SearchCollectionSupport { 067 068 private static final long serialVersionUID = 3430238280421965781L; 069 070 private Resource collectionDir; 071 private boolean spellcheck; 072 private LogAndSource log; 073 private static final SerializableObject token=new SerializableObject(); 074 075 076 /** 077 * @param searchEngine 078 * @param name 079 * @param path 080 * @param language 081 * @param lastUpdate 082 * @param created 083 */ 084 public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 085 DateTime lastUpdate, DateTime created,boolean spellcheck) { 086 super(searchEngine, name, path, language, lastUpdate,created); 087 this.spellcheck=spellcheck; 088 collectionDir=getPath().getRealResource(StringUtil.toIdentityVariableName(getName())); 089 090 log=searchEngine.getLogger(); 091 092 } 093 094 public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 095 DateTime lastUpdate, DateTime created) { 096 this(searchEngine, name, path, language, lastUpdate, created, true); 097 } 098 099 @Override 100 protected void _create() throws SearchException { 101 try { 102 if(!collectionDir.exists())collectionDir.createDirectory(true); 103 } 104 catch (IOException e) {} 105 } 106 107 @Override 108 protected void _optimize() throws SearchException { 109 IndexWriter[] writers=_getWriters(false); 110 for(int i=0;i<writers.length;i++) { 111 try { 112 optimizeEL(writers[i]); 113 } 114 finally { 115 close(writers[i]); 116 } 117 } 118 } 119 120 @Override 121 protected void _map(Resource path) throws SearchException { 122 throw new SearchException("mapping of existing Collection for file ["+path+"] not supported"); 123 } 124 125 @Override 126 protected void _repair() throws SearchException { 127 //throw new SearchException("repair of existing Collection not supported"); 128 } 129 130 @Override 131 protected IndexResult _indexFile(String id, String title, Resource res,String language) throws SearchException { 132 info(res.getAbsolutePath()); 133 _checkLanguage(language); 134 int before=getDocumentCount(id); 135 IndexWriter writer=null; 136 synchronized(token){ 137 try { 138 writer = _getWriter(id,true); 139 _index(writer,res,res.getName()); 140 writer.optimize(); 141 } 142 catch (Exception e) { 143 throw new SearchException(e); 144 } 145 finally { 146 close(writer); 147 } 148 indexSpellCheck(id); 149 } 150 if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1); 151 return new IndexResultImpl(0,1,0); 152 } 153 154 155 156 @Override 157 protected IndexResult _indexPath(String id, String title, Resource dir,String[] extensions, boolean recurse, String language) throws SearchException { 158 info(dir.getAbsolutePath()); 159 _checkLanguage(language); 160 int doccount=0; 161 IndexWriter writer=null; 162 synchronized(token){ 163 try { 164 writer = _getWriter(id,true); 165 doccount=_list(0,writer,dir,new LuceneExtensionFileFilter(extensions,recurse),""); 166 //optimizeEL(writer); 167 writer.optimize(); 168 } 169 catch (IOException e) { 170 throw new SearchException(e); 171 } 172 finally { 173 close(writer); 174 } 175 indexSpellCheck(id); 176 } 177 178 179 180 return new IndexResultImpl(0,0,doccount); 181 } 182 183 private void optimizeEL(IndexWriter writer) { 184 if(writer==null)return; 185 try { 186 writer.optimize(); 187 } 188 catch (Throwable t) { 189 //print.printST(t); 190 } 191 } 192 193 private void indexSpellCheck(String id) throws SearchException { 194 if(!spellcheck) return; 195 196 IndexReader reader=null; 197 FSDirectory spellDir=null; 198 199 Resource dir = _createSpellDirectory(id); 200 try { 201 File spellFile = FileWrapper.toFile(dir); 202 spellDir = FSDirectory.getDirectory(spellFile); 203 reader = _getReader(id,false); 204 Dictionary dictionary = new LuceneDictionary(reader,"contents"); 205 206 SpellChecker spellChecker = new SpellChecker(spellDir); 207 spellChecker.indexDictionary(dictionary); 208 209 } 210 catch(IOException ioe) { 211 throw new SearchException(ioe); 212 } 213 finally { 214 flushEL(reader); 215 closeEL(reader); 216 } 217 } 218 219 private void close(IndexWriter writer) throws SearchException { 220 if(writer!=null){ 221 //print.out("w-close"); 222 try { 223 writer.close(); 224 } catch (IOException e) { 225 throw new SearchException(e); 226 } 227 } 228 } 229 230 private static void close(IndexReader reader) throws SearchException { 231 if(reader!=null){ 232 try { 233 reader.close(); 234 } catch (IOException e) { 235 throw new SearchException(e); 236 } 237 } 238 } 239 240 private static void close(Searcher searcher) throws SearchException { 241 if(searcher!=null){ 242 try { 243 searcher.close(); 244 } catch (IOException e) { 245 throw new SearchException(e); 246 } 247 } 248 } 249 250 private static void flushEL(IndexReader reader) { 251 //print.out("r-closeEL"); 252 if(reader!=null){ 253 try { 254 reader.flush(); 255 } catch (Throwable t) { 256 //throw new SearchException(t); 257 } 258 } 259 } 260 private static void closeEL(IndexReader reader) { 261 //print.out("r-closeEL"); 262 if(reader!=null){ 263 try { 264 reader.close(); 265 } catch (Throwable t) { 266 //throw new SearchException(t); 267 } 268 } 269 } 270 271 @Override 272 protected IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language)throws SearchException { 273 //timeout=ThreadLocalPageContext.getConfig().getRequestTimeout().getMillis(); 274 return _indexURL(id, title, url, extensions, recurse, language,50000L); 275 } 276 public IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language, long timeout)throws SearchException { 277 _checkLanguage(language); 278 info(url.toExternalForm()); 279 int before=getDocumentCount(id); 280 IndexWriter writer=null; 281 synchronized(token){ 282 try { 283 writer = _getWriter(id,true); 284 new WebCrawler(log).parse(writer, url, extensions, recurse,timeout); 285 286 writer.optimize(); 287 } 288 catch (Exception e) { 289 throw new SearchException(e); 290 } 291 finally { 292 close(writer); 293 } 294 indexSpellCheck(id); 295 } 296 if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1); 297 return new IndexResultImpl(0,1,0); 298 //throw new SearchException("url indexing not supported"); 299 300 } 301 302 /** 303 * @param id 304 * @param title 305 * @param keyColumn 306 * @param bodyColumns 307 * @param language 308 * @param custom1 309 * @param custom2 310 * @param custom3 311 * @param custom4 312 * @return 313 * @throws SearchException 314 */ 315 protected IndexResult _deleteCustom(String id,QueryColumn keyColumn) throws SearchException { 316 317 int countBefore=0; 318 int countAfter=0; 319 320 Map<String,Document> docs=new HashMap<String,Document>(); 321 322 Set<String> keys=toSet(keyColumn); 323 IndexWriter writer=null; 324 String key; 325 IndexReader reader=null; 326 Document doc; 327 328 synchronized(token){ 329 try { 330 try { 331 reader=_getReader(id,false); 332 countBefore=reader.maxDoc(); 333 for(int i=0;i<countBefore;i++) { 334 doc=reader.document(i); 335 key=doc.getField("key").stringValue(); 336 if(!keys.contains(key)) 337 docs.put(key,doc); 338 } 339 } 340 catch(Exception e) {} 341 finally { 342 close(reader); 343 } 344 countAfter=docs.size(); 345 346 347 writer = _getWriter(id,true); 348 Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); 349 while(it.hasNext()) { 350 writer.addDocument(it.next().getValue()); 351 } 352 optimizeEL(writer); 353 354 } catch (IOException e) { 355 throw new SearchException(e); 356 } 357 finally { 358 close(writer); 359 } 360 indexSpellCheck(id); 361 } 362 int removes=countBefore-countAfter; 363 364 return new IndexResultImpl(removes,0,0); 365 } 366 367 private Set<String> toSet(QueryColumn column) { 368 Set<String> set=new HashSet<String>(); 369 Iterator it = column.valueIterator(); 370 while(it.hasNext()){ 371 set.add(Caster.toString(it.next(),null)); 372 } 373 return set; 374 } 375 376 /** 377 * @param id 378 * @param title 379 * @param keyColumn 380 * @param bodyColumns 381 * @param language 382 * @param custom1 383 * @param custom2 384 * @param custom3 385 * @param custom4 386 * @return 387 * @throws SearchException 388 */ 389 protected IndexResult _indexCustom(String id, Object title, QueryColumn keyColumn, QueryColumn[] bodyColumns, String language, 390 Object urlpath,Object custom1,Object custom2,Object custom3,Object custom4) throws SearchException { 391 _checkLanguage(language); 392 String t; 393 String url; 394 String c1; 395 String c2; 396 String c3; 397 String c4; 398 399 int countExisting=0; 400 int countAdd=keyColumn.size(); 401 int countNew=0; 402 403 Map<String,Document> docs=new HashMap<String,Document>(); 404 IndexWriter writer=null; 405 synchronized(token){ 406 try { 407 // read existing reader 408 IndexReader reader=null; 409 try { 410 reader=_getReader(id,false); 411 int len=reader.maxDoc(); 412 Document doc; 413 for(int i=0;i<len;i++) { 414 doc=reader.document(i); 415 docs.put(doc.getField("key").stringValue(),doc); 416 } 417 } 418 catch(Exception e) {} 419 finally { 420 close(reader); 421 } 422 423 countExisting=docs.size(); 424 writer = _getWriter(id,true); 425 int len = keyColumn.size(); 426 String key; 427 for(int i=1;i<=len;i++) { 428 key=Caster.toString(keyColumn.get(i,null),null); 429 if(key==null) continue; 430 431 StringBuilder body=new StringBuilder(); 432 for(int y=0;y<bodyColumns.length;y++) { 433 Object tmp=bodyColumns[y].get(i,null); 434 if(tmp!=null){ 435 body.append(tmp.toString()); 436 body.append(' '); 437 } 438 } 439 //t=(title==null)?null:Caster.toString(title.get(i,null),null); 440 //url=(urlpath==null)?null:Caster.toString(urlpath.get(i,null),null); 441 442 t=getRow(title,i); 443 url=getRow(urlpath,i); 444 c1=getRow(custom1,i); 445 c2=getRow(custom2,i); 446 c3=getRow(custom3,i); 447 c4=getRow(custom4,i); 448 449 docs.put(key,CustomDocument.getDocument(t,key,body.toString(),url,c1,c2,c3,c4)); 450 } 451 countNew=docs.size(); 452 Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); 453 Entry<String, Document> entry; 454 Document doc; 455 while(it.hasNext()) { 456 entry = it.next(); 457 doc = entry.getValue(); 458 writer.addDocument(doc); 459 } 460 optimizeEL(writer); 461 //writer.optimize(); 462 463 } 464 catch(IOException ioe) { 465 throw new SearchException(ioe); 466 } 467 finally { 468 close(writer); 469 } 470 indexSpellCheck(id); 471 } 472 int inserts=countNew-countExisting; 473 474 return new IndexResultImpl(0,inserts,countAdd-inserts); 475 } 476 477 private String getRow(Object column, int row) { 478 if(column instanceof QueryColumn){ 479 return Caster.toString(((QueryColumn)column).get(row,null),null); 480 } 481 if(column!=null) return Caster.toString(column,null); 482 return null; 483 } 484 485 @Override 486 protected IndexResult _purge() throws SearchException { 487 SearchIndex[] indexes=getIndexes(); 488 int count=0; 489 for(int i=0;i<indexes.length;i++) { 490 count+=getDocumentCount(indexes[i].getId()); 491 } 492 ResourceUtil.removeChildrenEL(collectionDir); 493 return new IndexResultImpl(count,0,0); 494 } 495 496 @Override 497 protected IndexResult _delete() throws SearchException { 498 SearchIndex[] indexes=getIndexes(); 499 int count=0; 500 for(int i=0;i<indexes.length;i++) { 501 count+=getDocumentCount(indexes[i].getId()); 502 } 503 ResourceUtil.removeEL(collectionDir, true); 504 return new IndexResultImpl(count,0,0); 505 } 506 507 @Override 508 protected IndexResult _deleteIndex(String id) throws SearchException { 509 int count=getDocumentCount(id); 510 ResourceUtil.removeEL(_getIndexDirectory(id,true), true); 511 return new IndexResultImpl(count,0,0); 512 } 513 514 @Override 515 public SearchResulItem[] _search(SearchData data, String criteria, String language,short type, 516 String categoryTree, String[] category) throws SearchException { 517 try { 518 519 if(type!=SEARCH_TYPE_SIMPLE) throw new SearchException("search type explicit not supported"); 520 Analyzer analyzer = SearchUtil.getAnalyzer(language); 521 Query query=null; 522 Op op=null; 523 Object highlighter=null; 524 railo.runtime.search.lucene2.query.QueryParser queryParser=new railo.runtime.search.lucene2.query.QueryParser(); 525 AddionalAttrs aa = AddionalAttrs.getAddionlAttrs(); 526 aa.setHasRowHandling(true); 527 int startrow=aa.getStartrow(); 528 int maxrows=aa.getMaxrows(); 529 530 531 if(!criteria.equals("*")) { 532 // FUTURE take this data from calling parameters 533 op=queryParser.parseOp(criteria); 534 if(op==null) criteria="*"; 535 else criteria=op.toString(); 536 try { 537 538 query = new QueryParser("contents",analyzer ).parse(criteria); 539 highlighter = Highlight.createHighlighter(query,aa.getContextHighlightBegin(),aa.getContextHighlightEnd()); 540 541 542 } 543 catch (ParseException e) { 544 throw new SearchException(e); 545 } 546 } 547 548 Resource[] files = _getIndexDirectories(); 549 550 if(files==null) return new SearchResulItem[0]; 551 ArrayList<SearchResulItem> list=new ArrayList<SearchResulItem>(); 552 String ct,c; 553 554 ArrayList<String> spellCheckIndex=spellcheck?new ArrayList<String>():null; 555 556 int count=0; 557 IndexReader reader = null; 558 Searcher searcher = null; 559 try { 560 outer:for(int i=0;i<files.length;i++) { 561 if(removeCorrupt(files[i]))continue; 562 String strFile=files[i].toString(); 563 SearchIndex si = indexes.get(files[i].getName()); 564 565 if(si==null)continue; 566 ct=si.getCategoryTree(); 567 c=ListUtil.arrayToList(si.getCategories(), ","); 568 569 // check category tree 570 if(!matchCategoryTree(ct,categoryTree))continue; 571 if(!matchCategories(si.getCategories(),category))continue; 572 573 Document doc; 574 String id=files[i].getName(); 575 data.addRecordsSearched(_countDocs(strFile)); 576 577 reader = _getReader(id,false); 578 if(query==null && "*".equals(criteria)) { 579 int len=reader.numDocs(); 580 for(int y=0;y<len;y++) { 581 if(startrow>++count)continue; 582 if(maxrows>-1 && list.size()>=maxrows) break outer; 583 doc = reader.document(y); 584 list.add(createSearchResulItem(highlighter,analyzer,doc,id,1,ct,c,aa.getContextPassages(),aa.getContextBytes())); 585 } 586 } 587 else { 588 if(spellcheck)spellCheckIndex.add(id); 589 // search 590 searcher = new IndexSearcher(reader); 591 Hits hits = searcher.search(query); 592 int len=hits.length(); 593 for (int y=0; y<len; y++) { 594 if(startrow>++count)continue; 595 if(maxrows>-1 && list.size()>=maxrows) break outer; 596 //list.add(new SearchResulItemHits(hits,y,highlighter,analyzer,id,ct,c,aa.getContextPassages(),aa.getContextBytes())); 597 doc = hits.doc(y); 598 list.add(createSearchResulItem(highlighter,analyzer,doc,id,hits.score(y),ct,c,aa.getContextPassages(),aa.getContextBytes())); 599 } 600 601 } 602 603 } 604 } 605 finally { 606 close(reader); 607 close(searcher); 608 } 609 610 // spellcheck 611 //SearchData data=ThreadLocalSearchData.get(); 612 if(spellcheck && data!=null) { 613 if(data.getSuggestionMax()>=list.size()) { 614 615 Map suggestions = data.getSuggestion(); 616 Iterator it = spellCheckIndex.iterator(); 617 String id; 618 Literal[] literals = queryParser.getLiteralSearchedTerms(); 619 String[] strLiterals = queryParser.getStringSearchedTerms(); 620 boolean setSuggestionQuery=false; 621 while(it.hasNext()) { 622 id=(String) it.next(); 623 // add to set to remove duplicate values 624 SuggestionItem si; 625 SpellChecker sc = getSpellChecker(id); 626 for(int i=0;i<strLiterals.length;i++) { 627 String[] arr = sc.suggestSimilar(strLiterals[i], 1000); 628 if(arr.length>0){ 629 literals[i].set("<suggestion>"+arr[0]+"</suggestion>"); 630 setSuggestionQuery=true; 631 632 si=(SuggestionItem) suggestions.get(strLiterals[i]); 633 if(si==null)suggestions.put(strLiterals[i],new SuggestionItem(arr)); 634 else si.add(arr); 635 } 636 } 637 } 638 if(setSuggestionQuery)data.setSuggestionQuery(op.toString()); 639 } 640 } 641 642 return list.toArray(new SearchResulItem[list.size()]); 643 } 644 catch (IOException e) { throw new SearchException(e); } 645 646 } 647 648 private SpellChecker getSpellChecker(String id) throws IOException { 649 FSDirectory siDir = FSDirectory.getDirectory(FileWrapper.toFile(_getSpellDirectory(id))); 650 SpellChecker spellChecker = new SpellChecker(siDir); 651 return spellChecker; 652 } 653 654 private boolean removeCorrupt(Resource dir) { 655 if(ResourceUtil.isEmptyFile(dir)) { 656 ResourceUtil.removeEL(dir, true); 657 return true; 658 } 659 return false; 660 } 661 662 private static SearchResulItem createSearchResulItem(Object highlighter,Analyzer a,Document doc, String name, float score, String ct, String c,int maxNumFragments, int maxLength) { 663 String contextSummary=""; 664 if(maxNumFragments>0) 665 contextSummary=Highlight.createContextSummary(highlighter,a,doc.get("contents"),maxNumFragments,maxLength,doc.get("summary")); 666 String summary = doc.get("summary"); 667 668 return new SearchResulItemImpl( 669 name, 670 doc.get("title"), 671 score, 672 doc.get("key"), 673 doc.get("url"), 674 summary,contextSummary, 675 ct,c, 676 doc.get("custom1"), 677 doc.get("custom2"), 678 doc.get("custom3"), 679 doc.get("custom4"), 680 doc.get("mime-type"), 681 doc.get("author"), 682 doc.get("size")); 683 684 } 685 686 private boolean matchCategories(String[] categoryIndex, String[] categorySearch) { 687 if(categorySearch==null ||categorySearch.length==0) return true; 688 String search; 689 for(int s=0;s<categorySearch.length;s++) { 690 search=categorySearch[s]; 691 for(int i=0;i<categoryIndex.length;i++) { 692 if(search.equals(categoryIndex[i]))return true; 693 } 694 } 695 return false; 696 } 697 698 private boolean matchCategoryTree(String categoryTreeIndex, String categoryTreeSearch) { 699 //if(StringUtil.isEmpty(categoryTreeIndex) || categoryTreeIndex.equals("/")) return true; 700 //if(StringUtil.isEmpty(categoryTreeSearch) || categoryTreeSearch.equals("/")) return true; 701 return categoryTreeIndex.startsWith(categoryTreeSearch); 702 } 703 704 /** 705 * list a directory and call every file 706 * @param writer 707 * @param res 708 * @param filter 709 * @param url 710 * @throws IOException 711 * @throws InterruptedException 712 */ 713 private int _list(int doccount,IndexWriter writer, Resource res,ResourceFilter filter,String url) { 714 715 if (res.isReadable()) { 716 if (res.exists() && res.isDirectory()) { 717 Resource[] files = (filter==null)?res.listResources():res.listResources(filter); 718 if (files != null) { 719 for (int i = 0; i < files.length; i++) { 720 if(removeCorrupt(files[i])){ 721 continue; 722 } 723 doccount=_list(doccount,writer, files[i],filter,url+"/"+files[i].getName()); 724 } 725 } 726 } 727 else { 728 try { 729 info(res.getAbsolutePath()); 730 _index(writer,res,url); 731 doccount++; 732 } catch (Exception e) {} 733 } 734 } 735 return doccount; 736 } 737 738 /** 739 * index a single file 740 * @param writer 741 * @param file 742 * @param url 743 * @throws IOException 744 * @throws InterruptedException 745 */ 746 private void _index(IndexWriter writer, Resource file,String url) throws IOException { 747 if(!file.exists()) return; 748 writer.addDocument(DocumentUtil.toDocument(file,url,SystemUtil.getCharset())); 749 } 750 751 752 753 754 755 /** 756 * @param id 757 * @return returns the Index Directory 758 */ 759 private Resource _getIndexDirectory(String id, boolean createIfNotExists) { 760 Resource indexDir=collectionDir.getRealResource(id); 761 if(createIfNotExists && !indexDir.exists())indexDir.mkdirs(); 762 return indexDir; 763 } 764 765 /** 766 * get writer to id 767 * @param id 768 * @return returns the Writer 769 * @throws IOException 770 * @throws SearchException 771 * @throws IOException 772 */ 773 private IndexWriter _getWriter(String id,boolean create) throws SearchException, IOException { 774 // FUTURE support for none file -> Directory Object 775 Resource dir = _getIndexDirectory(id,true); 776 return new IndexWriter(FileWrapper.toFile(dir), SearchUtil.getAnalyzer(getLanguage()), create); 777 //return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), create); 778 /*try { 779 return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), true); 780 } catch (IOException e) { 781 ResourceUtil.removeChildrenEL(dir); 782 dir.getResourceProvider().unlock(dir); 783 return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()),true); 784 }*/ 785 } 786 787 private IndexReader _getReader(String id,boolean absolute) throws IOException { 788 return _getReader(_getFile(id, absolute)); 789 } 790 791 private IndexReader _getReader(File file) throws IOException { 792 if(!IndexReader.indexExists(file))throw new IOException("there is no index in ["+file+"]"); 793 return IndexReader.open(file); 794 } 795 796 private File _getFile(String id,boolean absolute) throws IOException { 797 Resource res = absolute?ResourcesImpl.getFileResourceProvider().getResource(id):_getIndexDirectory(id,true); 798 res.getResourceProvider().read(res); 799 return FileWrapper.toFile(res); 800 } 801 802 /** 803 * @return returns all existing IndexWriter 804 */ 805 private Resource[] _getIndexDirectories() { 806 Resource[] files = collectionDir.listResources(new DirectoryResourceFilter()); 807 808 return files; 809 } 810 811 /** 812 * @return returns all existing IndexWriter 813 * @throws SearchException 814 */ 815 private IndexWriter[] _getWriters(boolean create) throws SearchException { 816 Resource[] files = _getIndexDirectories(); 817 if(files==null) return new IndexWriter[0]; 818 819 IndexWriter[] writers=new IndexWriter[files.length]; 820 for(int i=0;i<files.length;i++) { 821 try { 822 writers[i]=_getWriter(files[i].getName(),create); 823 } catch (IOException e) {} 824 } 825 return writers; 826 } 827 828 829 private int _countDocs(String col) { 830 // FUTURE add support for none file resources 831 int totalDocs; 832 IndexReader reader=null; 833 try { 834 reader=_getReader(col,true); 835 totalDocs = reader.numDocs(); 836 } 837 catch(Exception e) { 838 return 0; 839 } 840 finally { 841 closeEL(reader); 842 } 843 return totalDocs; 844 } 845 846 /** 847 * @deprecated see SearchUtil.getAnalyzer(String language); 848 * @param language 849 * @return returns language matching Analyzer 850 * @throws SearchException 851 */ 852 public static Analyzer _getAnalyzer(String language) throws SearchException { 853 return SearchUtil.getAnalyzer(language); 854 } 855 856 /** 857 * check given language against collection language 858 * @param language 859 * @throws SearchException 860 */ 861 private void _checkLanguage(String language) throws SearchException { 862 863 if(language!=null && !language.trim().equalsIgnoreCase(getLanguage())) { 864 throw new SearchException("collection Language and Index Language must be of same type, but collection language is of type ["+getLanguage()+"] and index language is of type ["+language+"]"); 865 } 866 } 867 868 @Override 869 public int getDocumentCount(String id) { 870 try { 871 if(!_getIndexDirectory(id,false).exists()) return 0; 872 IndexReader r=null; 873 int num=0; 874 try { 875 r = _getReader(id,false); 876 num=r.numDocs(); 877 } 878 finally { 879 close(r); 880 } 881 return num; 882 } 883 catch (Exception e) {} 884 return 0; 885 } 886 887 @Override 888 public int getDocumentCount() { 889 int count=0; 890 SearchIndex[] _indexes = getIndexes(); 891 for(int i=0;i<_indexes.length;i++) { 892 count+=getDocumentCount(_indexes[i].getId()); 893 } 894 895 return count; 896 } 897 898 @Override 899 public long getSize() { 900 return ResourceUtil.getRealSize(collectionDir)/1024; 901 } 902 903 public Object getCategoryInfo() { 904 Struct categories=new StructImpl(); 905 Struct categorytrees=new StructImpl(); 906 Struct info=new StructImpl(); 907 info.setEL("categories", categories); 908 info.setEL("categorytrees", categorytrees); 909 910 Iterator it = indexes.keySet().iterator(); 911 String[] cats; 912 String catTree; 913 Double tmp; 914 915 while(it.hasNext()) { 916 SearchIndex index=indexes.get(it.next()); 917 918 // category tree 919 catTree = index.getCategoryTree(); 920 tmp=(Double) categorytrees.get(catTree,null); 921 if(tmp==null) categorytrees.setEL(catTree,Caster.toDouble(1)); 922 else categorytrees.setEL(catTree,Caster.toDouble(tmp.doubleValue()+1)); 923 924 // categories 925 cats = index.getCategories(); 926 for(int i=0;i<cats.length;i++) { 927 tmp=(Double) categories.get(cats[i],null); 928 if(tmp==null) categories.setEL(cats[i],Caster.toDouble(1)); 929 else categories.setEL(cats[i],Caster.toDouble(tmp.doubleValue()+1)); 930 } 931 } 932 return info; 933 } 934 935 class ResourceIndexWriter extends IndexWriter { 936 937 private Resource dir; 938 939 public ResourceIndexWriter(Resource dir, Analyzer analyzer, boolean create) throws IOException { 940 941 super(FileWrapper.toFile(dir), analyzer, create); 942 this.dir=dir; 943 dir.getResourceProvider().lock(dir); 944 945 } 946 947 @Override 948 public synchronized void close() throws IOException { 949 super.close(); 950 dir.getResourceProvider().unlock(dir); 951 } 952 953 } 954 955 private Resource _createSpellDirectory(String id) { 956 Resource indexDir=collectionDir.getRealResource(id+"_"+(_getMax(true)+1)+"_spell"); 957 //print.out("create:"+indexDir); 958 indexDir.mkdirs(); 959 return indexDir; 960 } 961 962 private Resource _getSpellDirectory(String id) { 963 Resource indexDir=collectionDir.getRealResource(id+"_"+_getMax(false)+"_spell"); 964 //print.out("get:"+indexDir); 965 return indexDir; 966 } 967 968 private long _getMax(boolean delete) { 969 Resource[] children = collectionDir.listResources(new SpellDirFilter()); 970 long max=0, nbr; 971 String name; 972 for(int i=0;i<children.length;i++) { 973 name=children[i].getName(); 974 name=name.substring(0,name.length()-6); 975 nbr=Caster.toLongValue(name.substring(name.lastIndexOf('_')+1),0); 976 if(delete){ 977 try { 978 children[i].remove(true); 979 continue; 980 } 981 catch (Throwable t) {} 982 } 983 if(nbr>max)max=nbr; 984 } 985 return max; 986 } 987 988 private void info(String doc) { 989 if(log==null) return; 990 log.info("Collection:"+getName(), "indexing "+doc); 991 } 992 993 public class SpellDirFilter implements ResourceNameFilter { 994 995 @Override 996 public boolean accept(Resource parent, String name) { 997 return name.endsWith("_spell"); 998 } 999 1000 } 1001 }