001/** 002 * 003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved. 004 * 005 * This library is free software; you can redistribute it and/or 006 * modify it under the terms of the GNU Lesser General Public 007 * License as published by the Free Software Foundation; either 008 * version 2.1 of the License, or (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013 * Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public 016 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 017 * 018 **/ 019package lucee.runtime.search.lucene2.query; 020 021import java.util.ArrayList; 022import java.util.Iterator; 023import java.util.List; 024 025import lucee.commons.lang.ParserString; 026 027 028/** 029 * The simple query is the default query type and is appropriate for the vast majority of searches. 030 * When entering text on a search form, you perform a simple query by entering a word or comma-delimited strings, 031 * with optional wildcard characters. 032 * Verity treats each comma as a logical OR. If you omit the commas, Verity treats the expression as a phrase. 033 */ 034public final class QueryParser { 035 private static final String OR="or"; 036 private static final String AND="and"; 037 private static final String NOT="not"; 038 private static final char QUOTER='"'; 039 040 private static final String STAR = "*"; 041 private List list=new ArrayList(); 042 043 044 /** 045 * parse given string query 046 * @param criteria 047 * @return matching Query 048 */ 049 public String parse(String criteria) { 050 Op op = parseOp(criteria); 051 if(op==null) return STAR; 052 return op.toString(); 053 } 054 public Op parseOp(String criteria) { 055 if(criteria.length()>0) { 056 char first=criteria.charAt(0); 057 // start with operator 058 while(first=='*' || first=='~' || first=='?') { 059 criteria=criteria.substring(1); 060 if(criteria.length()==0) break; 061 first=criteria.charAt(0); 062 } 063 } 064 065 // make never foud query if quey is empty 066 if(criteria.length()==0) { 067 return null; 068 } 069 070 //StringBuffer str=new StringBuffer(); 071 ParserString ps=new ParserString(criteria); 072 Op op=null; 073 while(!ps.isAfterLast()) { 074 if(op==null)op=orOp(ps); 075 else op=new Concator(op,orOp(ps)); 076 } 077 return op; 078 } 079 080 081 082 private Op orOp(ParserString ps) { 083 Op op=andOp(ps); 084 ps.removeSpace(); 085 086 // OR 087 while(ps.isValidIndex() && (ps.forwardIfCurrent(OR) || ps.forwardIfCurrent(','))) { 088 ps.removeSpace(); 089 if(ps.isAfterLast()) op=new Concator(op,new Literal("OR") ); 090 else op=new Or(op,andOp(ps)); 091 } 092 return op; 093 } 094 095 private Op andOp(ParserString ps) { 096 Op op = notOp(ps); 097 ps.removeSpace(); 098 099 // AND 100 while(ps.isValidIndex() && ps.forwardIfCurrent(AND)) { 101 ps.removeSpace(); 102 if(ps.isAfterLast()) op=new Concator(op,new Literal("AND") ); 103 else op=new And(op,notOp(ps)); 104 } 105 return op; 106 } 107 108 private Op notOp(ParserString ps) { 109 Op op = spaceOp(ps); 110 ps.removeSpace(); 111 112 // NOT 113 while(ps.isValidIndex() && ps.forwardIfCurrent(NOT)) { 114 ps.removeSpace(); 115 if(ps.isAfterLast()) op=new Concator(op,new Literal("NOT") ); 116 else { 117 Op r; 118 op=new Not(op,r=clip(ps)); 119 this.list.remove(r); 120 } 121 } 122 return op; 123 } 124 125 private Op spaceOp(ParserString ps) { 126 Op op = clip(ps); 127 //ps.removeSpace(); 128 129 // Concat 130 while(ps.isValidIndex() && isSpace(ps.getCurrent()) && !(ps.isCurrentIgnoreSpace(OR) || ps.isCurrentIgnoreSpace(',') || ps.isCurrentIgnoreSpace(AND) || ps.isCurrentIgnoreSpace(NOT))) { 131 ps.removeSpace(); 132 op=new Concator(op,clip(ps)); 133 } 134 return op; 135 } 136 137 private Op clip(ParserString ps) { 138 // () 139 if(ps.isValidIndex() && ps.forwardIfCurrent('(')) { 140 Op op=orOp(ps); 141 ps.removeSpace(); 142 ps.forwardIfCurrent(')'); 143 ps.removeSpace(); 144 return op; 145 } 146 return literal(ps); 147 } 148 149 private Op literal(ParserString ps) { 150 ps.removeSpace(); 151 152 if(ps.isCurrent(QUOTER)) return quotedLiteral(ps); 153 return notQuotedLiteral(ps); 154 } 155 156 157 private Op quotedLiteral(ParserString ps) { 158 StringBuffer str=new StringBuffer(); 159 ps.next(); 160 char c; 161 while(!ps.isAfterLast()) { 162 c=ps.getCurrent(); 163 if(c==QUOTER) { 164 ps.next(); 165 if(ps.isCurrent(QUOTER)) str.append(QUOTER); 166 else break; 167 } 168 else { 169 str.append(c); 170 } 171 ps.next(); 172 } 173 174 return register(new Literal(str.toString())); 175 } 176 177 private Op notQuotedLiteral(ParserString ps) { 178 179 StringBuffer str=new StringBuffer(); 180 ps.removeSpace(); 181 182 char c; 183 184 while(!ps.isAfterLast()) { 185 c=ps.getCurrent(); 186 if(isSpace(c) || c==',') break; 187 str.append(c); 188 ps.next(); 189 } 190 return register(new Literal(str.toString())); 191 192 } 193 194 private boolean isSpace(char c) { 195 return c==' ' || c=='\t' || c=='\n' || c=='\b'; 196 } 197 198 199 200 /*public static void main(String[] args) { 201 QueryParser qp = new QueryParser(); 202 203 qp.parseOp("aaa zzz not bbb and ccc"); 204 print.out(qp.getLiteralSearchedTerms()); 205 if(true) return; 206 print.out(qp.parse("\"abc\"")); 207 print.out(qp.parse("abc")); 208 print.out(qp.parse("abc def")); 209 print.out(qp.parse("abc def")); 210 print.out(qp.parse("abc and def")); 211 print.out(qp.parse("\"\"\"abc\"\"\"")); 212 print.out(qp.parse("\"abc\" susi or peter")); 213 print.out(qp.parse("abc susi or peter")); 214 print.out(qp.parse("abc or susi or peter")); 215 print.out(qp.parse("*abc susi and peter or \"abc\"* , xxx,yy*")); 216 print.out(qp.parse("xxx,y\"y*")); 217 print.out(qp.parse("xxx y\"y*")); 218 print.out(qp.parse("")); 219 print.out(qp.parse("per or")); 220 print.out(qp.parse("per and")); 221 print.out(qp.parse("per not")); 222 print.out(qp.parse("andi per not susi")); 223 print.out(qp.parse("\"kinderhort test\"")); 224 }*/ 225 public Literal register(Literal literal) { 226 list.add(literal); 227 return literal; 228 } 229 230 public Literal[] getLiteralSearchedTerms() { 231 return (Literal[]) list.toArray(new Literal[list.size()]); 232 } 233 234 public String[] getStringSearchedTerms() { 235 Iterator it = list.iterator(); 236 String[] rtn=new String[list.size()]; 237 int i=0; 238 while(it.hasNext()) { 239 rtn[i++]=it.next().toString(); 240 } 241 242 return rtn; 243 } 244 245}