001/** 002 * 003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved. 004 * 005 * This library is free software; you can redistribute it and/or 006 * modify it under the terms of the GNU Lesser General Public 007 * License as published by the Free Software Foundation; either 008 * version 2.1 of the License, or (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013 * Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public 016 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 017 * 018 **/ 019package lucee.runtime.search.lucene2; 020 021import java.io.File; 022import java.io.IOException; 023import java.net.URL; 024import java.util.ArrayList; 025import java.util.HashMap; 026import java.util.HashSet; 027import java.util.Iterator; 028import java.util.Map; 029import java.util.Map.Entry; 030import java.util.Set; 031 032import lucee.commons.io.SystemUtil; 033import lucee.commons.io.log.Log; 034import lucee.commons.io.res.Resource; 035import lucee.commons.io.res.ResourcesImpl; 036import lucee.commons.io.res.filter.DirectoryResourceFilter; 037import lucee.commons.io.res.filter.ResourceFilter; 038import lucee.commons.io.res.filter.ResourceNameFilter; 039import lucee.commons.io.res.util.FileWrapper; 040import lucee.commons.io.res.util.ResourceUtil; 041import lucee.commons.lang.ExceptionUtil; 042import lucee.commons.lang.SerializableObject; 043import lucee.commons.lang.StringUtil; 044import lucee.runtime.config.ConfigImpl; 045import lucee.runtime.op.Caster; 046import lucee.runtime.search.AddionalAttrs; 047import lucee.runtime.search.IndexResult; 048import lucee.runtime.search.IndexResultImpl; 049import lucee.runtime.search.SearchCollectionSupport; 050import lucee.runtime.search.SearchData; 051import lucee.runtime.search.SearchEngineSupport; 052import lucee.runtime.search.SearchException; 053import lucee.runtime.search.SearchIndex; 054import lucee.runtime.search.SearchResulItem; 055import lucee.runtime.search.SearchResulItemImpl; 056import lucee.runtime.search.SuggestionItem; 057import lucee.runtime.search.lucene2.docs.CustomDocument; 058import lucee.runtime.search.lucene2.highlight.Highlight; 059import lucee.runtime.search.lucene2.net.WebCrawler; 060import lucee.runtime.search.lucene2.query.Literal; 061import lucee.runtime.search.lucene2.query.Op; 062import lucee.runtime.type.QueryColumn; 063import lucee.runtime.type.Struct; 064import lucee.runtime.type.StructImpl; 065import lucee.runtime.type.dt.DateTime; 066import lucee.runtime.type.util.ListUtil; 067 068import org.apache.lucene.analysis.Analyzer; 069import org.apache.lucene.document.Document; 070import org.apache.lucene.index.IndexReader; 071import org.apache.lucene.index.IndexWriter; 072import org.apache.lucene.queryParser.ParseException; 073import org.apache.lucene.queryParser.QueryParser; 074import org.apache.lucene.search.Hits; 075import org.apache.lucene.search.IndexSearcher; 076import org.apache.lucene.search.Query; 077import org.apache.lucene.search.Searcher; 078import org.apache.lucene.search.spell.Dictionary; 079import org.apache.lucene.search.spell.LuceneDictionary; 080import org.apache.lucene.search.spell.SpellChecker; 081import org.apache.lucene.store.FSDirectory; 082 083/** 084 * 085 */ 086public final class LuceneSearchCollection extends SearchCollectionSupport { 087 088 private static final long serialVersionUID = 3430238280421965781L; 089 090 private Resource collectionDir; 091 private boolean spellcheck; 092 private Log log; 093 private static final SerializableObject token=new SerializableObject(); 094 095 096 /** 097 * @param searchEngine 098 * @param name 099 * @param path 100 * @param language 101 * @param lastUpdate 102 * @param created 103 */ 104 public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 105 DateTime lastUpdate, DateTime created,boolean spellcheck) { 106 super(searchEngine, name, path, language, lastUpdate,created); 107 this.spellcheck=spellcheck; 108 collectionDir=getPath().getRealResource(StringUtil.toIdentityVariableName(getName())); 109 110 log=((ConfigImpl)searchEngine.getConfig()).getLog("search"); 111 } 112 113 public LuceneSearchCollection(SearchEngineSupport searchEngine, String name, Resource path, String language, //int count, 114 DateTime lastUpdate, DateTime created) { 115 this(searchEngine, name, path, language, lastUpdate, created, true); 116 } 117 118 @Override 119 protected void _create() throws SearchException { 120 try { 121 if(!collectionDir.exists())collectionDir.createDirectory(true); 122 } 123 catch (IOException e) {} 124 } 125 126 @Override 127 protected void _optimize() throws SearchException { 128 IndexWriter[] writers=_getWriters(false); 129 for(int i=0;i<writers.length;i++) { 130 try { 131 optimizeEL(writers[i]); 132 } 133 finally { 134 close(writers[i]); 135 } 136 } 137 } 138 139 @Override 140 protected void _map(Resource path) throws SearchException { 141 throw new SearchException("mapping of existing Collection for file ["+path+"] not supported"); 142 } 143 144 @Override 145 protected void _repair() throws SearchException { 146 //throw new SearchException("repair of existing Collection not supported"); 147 } 148 149 @Override 150 protected IndexResult _indexFile(String id, String title, Resource res,String language) throws SearchException { 151 info(res.getAbsolutePath()); 152 _checkLanguage(language); 153 int before=getDocumentCount(id); 154 IndexWriter writer=null; 155 synchronized(token){ 156 try { 157 writer = _getWriter(id,true); 158 _index(writer,res,res.getName()); 159 writer.optimize(); 160 } 161 catch (Exception e) { 162 throw new SearchException(e); 163 } 164 finally { 165 close(writer); 166 } 167 indexSpellCheck(id); 168 } 169 if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1); 170 return new IndexResultImpl(0,1,0); 171 } 172 173 174 175 @Override 176 protected IndexResult _indexPath(String id, String title, Resource dir,String[] extensions, boolean recurse, String language) throws SearchException { 177 info(dir.getAbsolutePath()); 178 _checkLanguage(language); 179 int doccount=0; 180 IndexWriter writer=null; 181 synchronized(token){ 182 try { 183 writer = _getWriter(id,true); 184 doccount=_list(0,writer,dir,new LuceneExtensionFileFilter(extensions,recurse),""); 185 //optimizeEL(writer); 186 writer.optimize(); 187 } 188 catch (IOException e) { 189 throw new SearchException(e); 190 } 191 finally { 192 close(writer); 193 } 194 indexSpellCheck(id); 195 } 196 197 198 199 return new IndexResultImpl(0,0,doccount); 200 } 201 202 private void optimizeEL(IndexWriter writer) { 203 if(writer==null)return; 204 try { 205 writer.optimize(); 206 } 207 catch (Throwable t) { 208 ExceptionUtil.rethrowIfNecessary(t); 209 //print.printST(t); 210 } 211 } 212 213 private void indexSpellCheck(String id) throws SearchException { 214 if(!spellcheck) return; 215 216 IndexReader reader=null; 217 FSDirectory spellDir=null; 218 219 Resource dir = _createSpellDirectory(id); 220 try { 221 File spellFile = FileWrapper.toFile(dir); 222 spellDir = FSDirectory.getDirectory(spellFile); 223 reader = _getReader(id,false); 224 Dictionary dictionary = new LuceneDictionary(reader,"contents"); 225 226 SpellChecker spellChecker = new SpellChecker(spellDir); 227 spellChecker.indexDictionary(dictionary); 228 229 } 230 catch(IOException ioe) { 231 throw new SearchException(ioe); 232 } 233 finally { 234 flushEL(reader); 235 closeEL(reader); 236 } 237 } 238 239 private void close(IndexWriter writer) throws SearchException { 240 if(writer!=null){ 241 //print.out("w-close"); 242 try { 243 writer.close(); 244 } catch (IOException e) { 245 throw new SearchException(e); 246 } 247 } 248 } 249 250 private static void close(IndexReader reader) throws SearchException { 251 if(reader!=null){ 252 try { 253 reader.close(); 254 } catch (IOException e) { 255 throw new SearchException(e); 256 } 257 } 258 } 259 260 private static void close(Searcher searcher) throws SearchException { 261 if(searcher!=null){ 262 try { 263 searcher.close(); 264 } catch (IOException e) { 265 throw new SearchException(e); 266 } 267 } 268 } 269 270 private static void flushEL(IndexReader reader) { 271 //print.out("r-closeEL"); 272 if(reader!=null){ 273 try { 274 reader.flush(); 275 } catch (Throwable t) { 276 ExceptionUtil.rethrowIfNecessary(t); 277 //throw new SearchException(t); 278 } 279 } 280 } 281 private static void closeEL(IndexReader reader) { 282 //print.out("r-closeEL"); 283 if(reader!=null){ 284 try { 285 reader.close(); 286 } catch (Throwable t) { 287 ExceptionUtil.rethrowIfNecessary(t); 288 //throw new SearchException(t); 289 } 290 } 291 } 292 293 @Override 294 protected IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language)throws SearchException { 295 //timeout=ThreadLocalPageContext.getConfig().getRequestTimeout().getMillis(); 296 return _indexURL(id, title, url, extensions, recurse, language,50000L); 297 } 298 public IndexResult _indexURL(String id, String title, URL url,String[] extensions, boolean recurse, String language, long timeout)throws SearchException { 299 _checkLanguage(language); 300 info(url.toExternalForm()); 301 int before=getDocumentCount(id); 302 IndexWriter writer=null; 303 synchronized(token){ 304 try { 305 writer = _getWriter(id,true); 306 new WebCrawler(log).parse(writer, url, extensions, recurse,timeout); 307 308 writer.optimize(); 309 } 310 catch (Exception e) { 311 throw new SearchException(e); 312 } 313 finally { 314 close(writer); 315 } 316 indexSpellCheck(id); 317 } 318 if(getDocumentCount(id)==before) return new IndexResultImpl(0,0,1); 319 return new IndexResultImpl(0,1,0); 320 //throw new SearchException("url indexing not supported"); 321 322 } 323 324 /** 325 * @param id 326 * @param title 327 * @param keyColumn 328 * @param bodyColumns 329 * @param language 330 * @param custom1 331 * @param custom2 332 * @param custom3 333 * @param custom4 334 * @return 335 * @throws SearchException 336 */ 337 protected IndexResult _deleteCustom(String id,QueryColumn keyColumn) throws SearchException { 338 339 int countBefore=0; 340 int countAfter=0; 341 342 Map<String,Document> docs=new HashMap<String,Document>(); 343 344 Set<String> keys=toSet(keyColumn); 345 IndexWriter writer=null; 346 String key; 347 IndexReader reader=null; 348 Document doc; 349 350 synchronized(token){ 351 try { 352 try { 353 reader=_getReader(id,false); 354 countBefore=reader.maxDoc(); 355 for(int i=0;i<countBefore;i++) { 356 doc=reader.document(i); 357 key=doc.getField("key").stringValue(); 358 if(!keys.contains(key)) 359 docs.put(key,doc); 360 } 361 } 362 catch(Exception e) {} 363 finally { 364 close(reader); 365 } 366 countAfter=docs.size(); 367 368 369 writer = _getWriter(id,true); 370 Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); 371 while(it.hasNext()) { 372 writer.addDocument(it.next().getValue()); 373 } 374 optimizeEL(writer); 375 376 } catch (IOException e) { 377 throw new SearchException(e); 378 } 379 finally { 380 close(writer); 381 } 382 indexSpellCheck(id); 383 } 384 int removes=countBefore-countAfter; 385 386 return new IndexResultImpl(removes,0,0); 387 } 388 389 private Set<String> toSet(QueryColumn column) { 390 Set<String> set=new HashSet<String>(); 391 Iterator it = column.valueIterator(); 392 while(it.hasNext()){ 393 set.add(Caster.toString(it.next(),null)); 394 } 395 return set; 396 } 397 398 /** 399 * @param id 400 * @param title 401 * @param keyColumn 402 * @param bodyColumns 403 * @param language 404 * @param custom1 405 * @param custom2 406 * @param custom3 407 * @param custom4 408 * @return 409 * @throws SearchException 410 */ 411 protected IndexResult _indexCustom(String id, Object title, QueryColumn keyColumn, QueryColumn[] bodyColumns, String language, 412 Object urlpath,Object custom1,Object custom2,Object custom3,Object custom4) throws SearchException { 413 _checkLanguage(language); 414 String t; 415 String url; 416 String c1; 417 String c2; 418 String c3; 419 String c4; 420 421 int countExisting=0; 422 int countAdd=keyColumn.size(); 423 int countNew=0; 424 425 Map<String,Document> docs=new HashMap<String,Document>(); 426 IndexWriter writer=null; 427 synchronized(token){ 428 try { 429 // read existing reader 430 IndexReader reader=null; 431 try { 432 reader=_getReader(id,false); 433 int len=reader.maxDoc(); 434 Document doc; 435 for(int i=0;i<len;i++) { 436 doc=reader.document(i); 437 docs.put(doc.getField("key").stringValue(),doc); 438 } 439 } 440 catch(Exception e) {} 441 finally { 442 close(reader); 443 } 444 445 countExisting=docs.size(); 446 writer = _getWriter(id,true); 447 int len = keyColumn.size(); 448 String key; 449 for(int i=1;i<=len;i++) { 450 key=Caster.toString(keyColumn.get(i,null),null); 451 if(key==null) continue; 452 453 StringBuilder body=new StringBuilder(); 454 for(int y=0;y<bodyColumns.length;y++) { 455 Object tmp=bodyColumns[y].get(i,null); 456 if(tmp!=null){ 457 body.append(tmp.toString()); 458 body.append(' '); 459 } 460 } 461 //t=(title==null)?null:Caster.toString(title.get(i,null),null); 462 //url=(urlpath==null)?null:Caster.toString(urlpath.get(i,null),null); 463 464 t=getRow(title,i); 465 url=getRow(urlpath,i); 466 c1=getRow(custom1,i); 467 c2=getRow(custom2,i); 468 c3=getRow(custom3,i); 469 c4=getRow(custom4,i); 470 471 docs.put(key,CustomDocument.getDocument(t,key,body.toString(),url,c1,c2,c3,c4)); 472 } 473 countNew=docs.size(); 474 Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); 475 Entry<String, Document> entry; 476 Document doc; 477 while(it.hasNext()) { 478 entry = it.next(); 479 doc = entry.getValue(); 480 writer.addDocument(doc); 481 } 482 optimizeEL(writer); 483 //writer.optimize(); 484 485 } 486 catch(IOException ioe) { 487 throw new SearchException(ioe); 488 } 489 finally { 490 close(writer); 491 } 492 indexSpellCheck(id); 493 } 494 int inserts=countNew-countExisting; 495 496 return new IndexResultImpl(0,inserts,countAdd-inserts); 497 } 498 499 private String getRow(Object column, int row) { 500 if(column instanceof QueryColumn){ 501 return Caster.toString(((QueryColumn)column).get(row,null),null); 502 } 503 if(column!=null) return Caster.toString(column,null); 504 return null; 505 } 506 507 @Override 508 protected IndexResult _purge() throws SearchException { 509 SearchIndex[] indexes=getIndexes(); 510 int count=0; 511 for(int i=0;i<indexes.length;i++) { 512 count+=getDocumentCount(indexes[i].getId()); 513 } 514 ResourceUtil.removeChildrenEL(collectionDir); 515 return new IndexResultImpl(count,0,0); 516 } 517 518 @Override 519 protected IndexResult _delete() throws SearchException { 520 SearchIndex[] indexes=getIndexes(); 521 int count=0; 522 for(int i=0;i<indexes.length;i++) { 523 count+=getDocumentCount(indexes[i].getId()); 524 } 525 ResourceUtil.removeEL(collectionDir, true); 526 return new IndexResultImpl(count,0,0); 527 } 528 529 @Override 530 protected IndexResult _deleteIndex(String id) throws SearchException { 531 int count=getDocumentCount(id); 532 ResourceUtil.removeEL(_getIndexDirectory(id,true), true); 533 return new IndexResultImpl(count,0,0); 534 } 535 536 @Override 537 public SearchResulItem[] _search(SearchData data, String criteria, String language,short type, 538 String categoryTree, String[] category) throws SearchException { 539 try { 540 541 if(type!=SEARCH_TYPE_SIMPLE) throw new SearchException("search type explicit not supported"); 542 Analyzer analyzer = SearchUtil.getAnalyzer(language); 543 Query query=null; 544 Op op=null; 545 Object highlighter=null; 546 lucee.runtime.search.lucene2.query.QueryParser queryParser=new lucee.runtime.search.lucene2.query.QueryParser(); 547 AddionalAttrs aa = AddionalAttrs.getAddionlAttrs(); 548 aa.setHasRowHandling(true); 549 int startrow=aa.getStartrow(); 550 int maxrows=aa.getMaxrows(); 551 552 553 if(!criteria.equals("*")) { 554 // FUTURE take this data from calling parameters 555 op=queryParser.parseOp(criteria); 556 if(op==null) criteria="*"; 557 else criteria=op.toString(); 558 try { 559 560 query = new QueryParser("contents",analyzer ).parse(criteria); 561 highlighter = Highlight.createHighlighter(query,aa.getContextHighlightBegin(),aa.getContextHighlightEnd()); 562 563 564 } 565 catch (ParseException e) { 566 throw new SearchException(e); 567 } 568 } 569 570 Resource[] files = _getIndexDirectories(); 571 572 if(files==null) return new SearchResulItem[0]; 573 ArrayList<SearchResulItem> list=new ArrayList<SearchResulItem>(); 574 String ct,c; 575 576 ArrayList<String> spellCheckIndex=spellcheck?new ArrayList<String>():null; 577 578 int count=0; 579 IndexReader reader = null; 580 Searcher searcher = null; 581 try { 582 outer:for(int i=0;i<files.length;i++) { 583 if(removeCorrupt(files[i]))continue; 584 String strFile=files[i].toString(); 585 SearchIndex si = indexes.get(files[i].getName()); 586 587 if(si==null)continue; 588 ct=si.getCategoryTree(); 589 c=ListUtil.arrayToList(si.getCategories(), ","); 590 591 // check category tree 592 if(!matchCategoryTree(ct,categoryTree))continue; 593 if(!matchCategories(si.getCategories(),category))continue; 594 595 Document doc; 596 String id=files[i].getName(); 597 data.addRecordsSearched(_countDocs(strFile)); 598 599 reader = _getReader(id,false); 600 if(query==null && "*".equals(criteria)) { 601 int len=reader.numDocs(); 602 for(int y=0;y<len;y++) { 603 if(startrow>++count)continue; 604 if(maxrows>-1 && list.size()>=maxrows) break outer; 605 doc = reader.document(y); 606 list.add(createSearchResulItem(highlighter,analyzer,doc,id,1,ct,c,aa.getContextPassages(),aa.getContextBytes())); 607 } 608 } 609 else { 610 if(spellcheck)spellCheckIndex.add(id); 611 // search 612 searcher = new IndexSearcher(reader); 613 Hits hits = searcher.search(query); 614 int len=hits.length(); 615 for (int y=0; y<len; y++) { 616 if(startrow>++count)continue; 617 if(maxrows>-1 && list.size()>=maxrows) break outer; 618 //list.add(new SearchResulItemHits(hits,y,highlighter,analyzer,id,ct,c,aa.getContextPassages(),aa.getContextBytes())); 619 doc = hits.doc(y); 620 list.add(createSearchResulItem(highlighter,analyzer,doc,id,hits.score(y),ct,c,aa.getContextPassages(),aa.getContextBytes())); 621 } 622 623 } 624 625 } 626 } 627 finally { 628 close(reader); 629 close(searcher); 630 } 631 632 // spellcheck 633 //SearchData data=ThreadLocalSearchData.get(); 634 if(spellcheck && data!=null) { 635 if(data.getSuggestionMax()>=list.size()) { 636 637 Map suggestions = data.getSuggestion(); 638 Iterator it = spellCheckIndex.iterator(); 639 String id; 640 Literal[] literals = queryParser.getLiteralSearchedTerms(); 641 String[] strLiterals = queryParser.getStringSearchedTerms(); 642 boolean setSuggestionQuery=false; 643 while(it.hasNext()) { 644 id=(String) it.next(); 645 // add to set to remove duplicate values 646 SuggestionItem si; 647 SpellChecker sc = getSpellChecker(id); 648 for(int i=0;i<strLiterals.length;i++) { 649 String[] arr = sc.suggestSimilar(strLiterals[i], 1000); 650 if(arr.length>0){ 651 literals[i].set("<suggestion>"+arr[0]+"</suggestion>"); 652 setSuggestionQuery=true; 653 654 si=(SuggestionItem) suggestions.get(strLiterals[i]); 655 if(si==null)suggestions.put(strLiterals[i],new SuggestionItem(arr)); 656 else si.add(arr); 657 } 658 } 659 } 660 if(setSuggestionQuery)data.setSuggestionQuery(op.toString()); 661 } 662 } 663 664 return list.toArray(new SearchResulItem[list.size()]); 665 } 666 catch (IOException e) { throw new SearchException(e); } 667 668 } 669 670 private SpellChecker getSpellChecker(String id) throws IOException { 671 FSDirectory siDir = FSDirectory.getDirectory(FileWrapper.toFile(_getSpellDirectory(id))); 672 SpellChecker spellChecker = new SpellChecker(siDir); 673 return spellChecker; 674 } 675 676 private boolean removeCorrupt(Resource dir) { 677 if(ResourceUtil.isEmptyFile(dir)) { 678 ResourceUtil.removeEL(dir, true); 679 return true; 680 } 681 return false; 682 } 683 684 private static SearchResulItem createSearchResulItem(Object highlighter,Analyzer a,Document doc, String name, float score, String ct, String c,int maxNumFragments, int maxLength) { 685 String contextSummary=""; 686 if(maxNumFragments>0) 687 contextSummary=Highlight.createContextSummary(highlighter,a,doc.get("contents"),maxNumFragments,maxLength,doc.get("summary")); 688 String summary = doc.get("summary"); 689 690 return new SearchResulItemImpl( 691 name, 692 doc.get("title"), 693 score, 694 doc.get("key"), 695 doc.get("url"), 696 summary,contextSummary, 697 ct,c, 698 doc.get("custom1"), 699 doc.get("custom2"), 700 doc.get("custom3"), 701 doc.get("custom4"), 702 doc.get("mime-type"), 703 doc.get("author"), 704 doc.get("size")); 705 706 } 707 708 private boolean matchCategories(String[] categoryIndex, String[] categorySearch) { 709 if(categorySearch==null ||categorySearch.length==0) return true; 710 String search; 711 for(int s=0;s<categorySearch.length;s++) { 712 search=categorySearch[s]; 713 for(int i=0;i<categoryIndex.length;i++) { 714 if(search.equals(categoryIndex[i]))return true; 715 } 716 } 717 return false; 718 } 719 720 private boolean matchCategoryTree(String categoryTreeIndex, String categoryTreeSearch) { 721 //if(StringUtil.isEmpty(categoryTreeIndex) || categoryTreeIndex.equals("/")) return true; 722 //if(StringUtil.isEmpty(categoryTreeSearch) || categoryTreeSearch.equals("/")) return true; 723 return categoryTreeIndex.startsWith(categoryTreeSearch); 724 } 725 726 /** 727 * list a directory and call every file 728 * @param writer 729 * @param res 730 * @param filter 731 * @param url 732 * @throws IOException 733 * @throws InterruptedException 734 */ 735 private int _list(int doccount,IndexWriter writer, Resource res,ResourceFilter filter,String url) { 736 737 if (res.isReadable()) { 738 if (res.exists() && res.isDirectory()) { 739 Resource[] files = (filter==null)?res.listResources():res.listResources(filter); 740 if (files != null) { 741 for (int i = 0; i < files.length; i++) { 742 if(removeCorrupt(files[i])){ 743 continue; 744 } 745 doccount=_list(doccount,writer, files[i],filter,url+"/"+files[i].getName()); 746 } 747 } 748 } 749 else { 750 try { 751 info(res.getAbsolutePath()); 752 _index(writer,res,url); 753 doccount++; 754 } catch (Exception e) {} 755 } 756 } 757 return doccount; 758 } 759 760 /** 761 * index a single file 762 * @param writer 763 * @param file 764 * @param url 765 * @throws IOException 766 * @throws InterruptedException 767 */ 768 private void _index(IndexWriter writer, Resource file,String url) throws IOException { 769 if(!file.exists()) return; 770 writer.addDocument(DocumentUtil.toDocument(file,url,SystemUtil.getCharset().name())); 771 } 772 773 774 775 776 777 /** 778 * @param id 779 * @return returns the Index Directory 780 */ 781 private Resource _getIndexDirectory(String id, boolean createIfNotExists) { 782 Resource indexDir=collectionDir.getRealResource(id); 783 if(createIfNotExists && !indexDir.exists())indexDir.mkdirs(); 784 return indexDir; 785 } 786 787 /** 788 * get writer to id 789 * @param id 790 * @return returns the Writer 791 * @throws IOException 792 * @throws SearchException 793 * @throws IOException 794 */ 795 private IndexWriter _getWriter(String id,boolean create) throws SearchException, IOException { 796 // FUTURE support for none file -> Directory Object 797 Resource dir = _getIndexDirectory(id,true); 798 return new IndexWriter(FileWrapper.toFile(dir), SearchUtil.getAnalyzer(getLanguage()), create); 799 //return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), create); 800 /*try { 801 return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()), true); 802 } catch (IOException e) { 803 ResourceUtil.removeChildrenEL(dir); 804 dir.getResourceProvider().unlock(dir); 805 return new ResourceIndexWriter(dir, SearchUtil.getAnalyzer(getLanguage()),true); 806 }*/ 807 } 808 809 private IndexReader _getReader(String id,boolean absolute) throws IOException { 810 return _getReader(_getFile(id, absolute)); 811 } 812 813 private IndexReader _getReader(File file) throws IOException { 814 if(!IndexReader.indexExists(file))throw new IOException("there is no index in ["+file+"]"); 815 return IndexReader.open(file); 816 } 817 818 private File _getFile(String id,boolean absolute) throws IOException { 819 Resource res = absolute?ResourcesImpl.getFileResourceProvider().getResource(id):_getIndexDirectory(id,true); 820 res.getResourceProvider().read(res); 821 return FileWrapper.toFile(res); 822 } 823 824 /** 825 * @return returns all existing IndexWriter 826 */ 827 private Resource[] _getIndexDirectories() { 828 Resource[] files = collectionDir.listResources(new DirectoryResourceFilter()); 829 830 return files; 831 } 832 833 /** 834 * @return returns all existing IndexWriter 835 * @throws SearchException 836 */ 837 private IndexWriter[] _getWriters(boolean create) throws SearchException { 838 Resource[] files = _getIndexDirectories(); 839 if(files==null) return new IndexWriter[0]; 840 841 IndexWriter[] writers=new IndexWriter[files.length]; 842 for(int i=0;i<files.length;i++) { 843 try { 844 writers[i]=_getWriter(files[i].getName(),create); 845 } catch (IOException e) {} 846 } 847 return writers; 848 } 849 850 851 private int _countDocs(String col) { 852 // FUTURE add support for none file resources 853 int totalDocs; 854 IndexReader reader=null; 855 try { 856 reader=_getReader(col,true); 857 totalDocs = reader.numDocs(); 858 } 859 catch(Exception e) { 860 return 0; 861 } 862 finally { 863 closeEL(reader); 864 } 865 return totalDocs; 866 } 867 868 /** 869 * @deprecated see SearchUtil.getAnalyzer(String language); 870 * @param language 871 * @return returns language matching Analyzer 872 * @throws SearchException 873 */ 874 public static Analyzer _getAnalyzer(String language) throws SearchException { 875 return SearchUtil.getAnalyzer(language); 876 } 877 878 /** 879 * check given language against collection language 880 * @param language 881 * @throws SearchException 882 */ 883 private void _checkLanguage(String language) throws SearchException { 884 885 if(language!=null && !language.trim().equalsIgnoreCase(getLanguage())) { 886 throw new SearchException("collection Language and Index Language must be of same type, but collection language is of type ["+getLanguage()+"] and index language is of type ["+language+"]"); 887 } 888 } 889 890 @Override 891 public int getDocumentCount(String id) { 892 try { 893 if(!_getIndexDirectory(id,false).exists()) return 0; 894 IndexReader r=null; 895 int num=0; 896 try { 897 r = _getReader(id,false); 898 num=r.numDocs(); 899 } 900 finally { 901 close(r); 902 } 903 return num; 904 } 905 catch (Exception e) {} 906 return 0; 907 } 908 909 @Override 910 public int getDocumentCount() { 911 int count=0; 912 SearchIndex[] _indexes = getIndexes(); 913 for(int i=0;i<_indexes.length;i++) { 914 count+=getDocumentCount(_indexes[i].getId()); 915 } 916 917 return count; 918 } 919 920 @Override 921 public long getSize() { 922 return ResourceUtil.getRealSize(collectionDir)/1024; 923 } 924 925 public Object getCategoryInfo() { 926 Struct categories=new StructImpl(); 927 Struct categorytrees=new StructImpl(); 928 Struct info=new StructImpl(); 929 info.setEL("categories", categories); 930 info.setEL("categorytrees", categorytrees); 931 932 Iterator it = indexes.keySet().iterator(); 933 String[] cats; 934 String catTree; 935 Double tmp; 936 937 while(it.hasNext()) { 938 SearchIndex index=indexes.get(it.next()); 939 940 // category tree 941 catTree = index.getCategoryTree(); 942 tmp=(Double) categorytrees.get(catTree,null); 943 if(tmp==null) categorytrees.setEL(catTree,Caster.toDouble(1)); 944 else categorytrees.setEL(catTree,Caster.toDouble(tmp.doubleValue()+1)); 945 946 // categories 947 cats = index.getCategories(); 948 for(int i=0;i<cats.length;i++) { 949 tmp=(Double) categories.get(cats[i],null); 950 if(tmp==null) categories.setEL(cats[i],Caster.toDouble(1)); 951 else categories.setEL(cats[i],Caster.toDouble(tmp.doubleValue()+1)); 952 } 953 } 954 return info; 955 } 956 957 class ResourceIndexWriter extends IndexWriter { 958 959 private Resource dir; 960 961 public ResourceIndexWriter(Resource dir, Analyzer analyzer, boolean create) throws IOException { 962 963 super(FileWrapper.toFile(dir), analyzer, create); 964 this.dir=dir; 965 dir.getResourceProvider().lock(dir); 966 967 } 968 969 @Override 970 public synchronized void close() throws IOException { 971 super.close(); 972 dir.getResourceProvider().unlock(dir); 973 } 974 975 } 976 977 private Resource _createSpellDirectory(String id) { 978 Resource indexDir=collectionDir.getRealResource(id+"_"+(_getMax(true)+1)+"_spell"); 979 //print.out("create:"+indexDir); 980 indexDir.mkdirs(); 981 return indexDir; 982 } 983 984 private Resource _getSpellDirectory(String id) { 985 Resource indexDir=collectionDir.getRealResource(id+"_"+_getMax(false)+"_spell"); 986 //print.out("get:"+indexDir); 987 return indexDir; 988 } 989 990 private long _getMax(boolean delete) { 991 Resource[] children = collectionDir.listResources(new SpellDirFilter()); 992 long max=0, nbr; 993 String name; 994 for(int i=0;i<children.length;i++) { 995 name=children[i].getName(); 996 name=name.substring(0,name.length()-6); 997 nbr=Caster.toLongValue(name.substring(name.lastIndexOf('_')+1),0); 998 if(delete){ 999 try { 1000 children[i].remove(true); 1001 continue; 1002 } 1003 catch (Throwable t) { 1004 ExceptionUtil.rethrowIfNecessary(t); 1005 } 1006 } 1007 if(nbr>max)max=nbr; 1008 } 1009 return max; 1010 } 1011 1012 private void info(String doc) { 1013 if(log==null) return; 1014 log.log(Log.LEVEL_INFO,"Collection:"+getName(), "indexing "+doc); 1015 } 1016 1017 public class SpellDirFilter implements ResourceNameFilter { 1018 1019 @Override 1020 public boolean accept(Resource parent, String name) { 1021 return name.endsWith("_spell"); 1022 } 1023 1024 } 1025}