001/** 002 * 003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved. 004 * 005 * This library is free software; you can redistribute it and/or 006 * modify it under the terms of the GNU Lesser General Public 007 * License as published by the Free Software Foundation; either 008 * version 2.1 of the License, or (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013 * Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public 016 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 017 * 018 **/ 019package lucee.runtime.regex; 020 021import java.util.Map; 022 023import lucee.commons.collection.MapFactory; 024import lucee.runtime.op.Constants; 025import lucee.runtime.type.Array; 026import lucee.runtime.type.ArrayImpl; 027import lucee.runtime.type.Struct; 028import lucee.runtime.type.StructImpl; 029 030import org.apache.oro.text.regex.MalformedPatternException; 031import org.apache.oro.text.regex.MatchResult; 032import org.apache.oro.text.regex.Pattern; 033import org.apache.oro.text.regex.PatternMatcherInput; 034import org.apache.oro.text.regex.Perl5Compiler; 035import org.apache.oro.text.regex.Perl5Matcher; 036import org.apache.oro.text.regex.Perl5Substitution; 037import org.apache.oro.text.regex.Util; 038 039/** 040 * 041 */ 042public final class Perl5Util { 043 044 private static Map<String,Pattern> patterns=MapFactory.<String,Pattern>getConcurrentMap(); 045 046 /** 047 * return index of the first occurence of the pattern in input text 048 * @param strPattern pattern to search 049 * @param strInput text to search pattern 050 * @param offset 051 * @param caseSensitive 052 * @return position of the first occurence 053 * @throws MalformedPatternException 054 */ 055 public static int indexOf(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException { 056 //Perl5Compiler compiler = new Perl5Compiler(); 057 PatternMatcherInput input = new PatternMatcherInput(strInput); 058 Perl5Matcher matcher = new Perl5Matcher(); 059 060 int compileOptions=caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK; 061 compileOptions+=Perl5Compiler.SINGLELINE_MASK; 062 if(offset < 1) offset = 1; 063 064 Pattern pattern = getPattern(strPattern,compileOptions); 065 //Pattern pattern = compiler.compile(strPattern,compileOptions); 066 067 068 if(offset <= strInput.length()) input.setCurrentOffset(offset - 1); 069 070 if(offset <= strInput.length() && matcher.contains(input, pattern)) { 071 return matcher.getMatch().beginOffset(0) + 1; 072 } 073 return 0; 074 } 075 076 077 /** 078 * find occurence of a pattern in a string (same like indexOf), but dont return first ocurence , it return 079 * struct with all information 080 * @param strPattern 081 * @param strInput 082 * @param offset 083 * @param caseSensitive 084 * @return 085 * @throws MalformedPatternException 086 */ 087 public static Struct find(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException { 088 089 Perl5Matcher matcher = new Perl5Matcher(); 090 PatternMatcherInput input = new PatternMatcherInput(strInput); 091 092 093 int compileOptions=caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK; 094 compileOptions+=Perl5Compiler.SINGLELINE_MASK; 095 if(offset < 1) offset = 1; 096 097 098 Pattern pattern = getPattern(strPattern,compileOptions); 099 100 101 if(offset <= strInput.length()) input.setCurrentOffset(offset - 1); 102 103 if(offset <= strInput.length() && matcher.contains(input, pattern)) { 104 MatchResult result = matcher.getMatch(); 105 106 int groupCount = result.groups(); 107 Array posArray = new ArrayImpl(); 108 Array lenArray = new ArrayImpl(); 109 for(int i = 0; i < groupCount; i++) { 110 int off = result.beginOffset(i); 111 posArray.appendEL(Integer.valueOf(off + 1)); 112 lenArray.appendEL(Integer.valueOf(result.endOffset(i) - off)); 113 } 114 Struct struct = new StructImpl(); 115 struct.setEL("pos", posArray); 116 struct.setEL("len", lenArray); 117 return struct; 118 119 } 120 Array posArray = new ArrayImpl(); 121 Array lenArray = new ArrayImpl(); 122 posArray.appendEL(Constants.INTEGER_0); 123 lenArray.appendEL(Constants.INTEGER_0); 124 125 Struct struct = new StructImpl(); 126 struct.setEL("pos", posArray); 127 struct.setEL("len", lenArray); 128 return struct; 129 } 130 131 132 public static Array match(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException { 133 134 Perl5Matcher matcher = new Perl5Matcher(); 135 PatternMatcherInput input = new PatternMatcherInput(strInput); 136 137 138 int compileOptions=caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK; 139 compileOptions+=Perl5Compiler.SINGLELINE_MASK; 140 if(offset < 1) offset = 1; 141 142 143 Pattern pattern = getPattern(strPattern,compileOptions); 144 145 146 Array rtn = new ArrayImpl(); 147 MatchResult result; 148 while(matcher.contains(input, pattern)) { 149 result = matcher.getMatch(); 150 rtn.appendEL(result.toString()); 151 /* 152 System.out.println("Match: " + result.toString()); 153 System.out.println("Length: " + result.length()); 154 groups = result.groups(); 155 System.out.println("Groups: " + groups); 156 System.out.println("Begin offset: " + result.beginOffset(0)); 157 System.out.println("End offset: " + result.endOffset(0)); 158 System.out.println("Saved Groups: "); 159 160 // Start at 1 because we just printed out group 0 161 for(int group = 1; group < groups; group++) { 162 System.out.println(group + ": " + result.group(group)); 163 System.out.println("Begin: " + result.begin(group)); 164 System.out.println("End: " + result.end(group)); 165 }*/ 166 } 167 return rtn; 168 } 169 170 171 private static Pattern getPattern(String strPattern, int type) throws MalformedPatternException { 172 Object o=patterns.get(strPattern+type); 173 if(o==null) { 174 Pattern pattern = new Perl5Compiler().compile(strPattern, type); 175 patterns.put(strPattern+type,pattern); 176 return pattern; 177 } 178 return (Pattern) o; 179 180 } 181 182 /** 183 * replace the first/all occurence of given pattern 184 * @param strInput text to search pattern 185 * @param strPattern pattern to search 186 * @param replacement text to replace with pattern 187 * @param caseSensitive 188 * @param replaceAll do replace all or only one 189 * @return transformed text 190 * @throws MalformedPatternException 191 */ 192 public static String replace(String strInput, String strPattern, String replacement, boolean caseSensitive, boolean replaceAll) throws MalformedPatternException { 193 return _replace(strInput,strPattern,escape(replacement),caseSensitive,replaceAll); 194 } 195 private static String _replace(String strInput, String strPattern, String replacement, boolean caseSensitive, boolean replaceAll) throws MalformedPatternException { 196 Pattern pattern = getPattern(strPattern, caseSensitive?16:17); 197 return Util.substitute(new Perl5Matcher(), pattern, new Perl5Substitution(replacement), strInput, replaceAll ? -1 : 1); 198 } 199 200 201 private static String escape(String replacement) throws MalformedPatternException { 202 replacement = _replace(replacement, "\\\\", "\\\\\\\\", false, true); 203 replacement=_escape(replacement); 204 replacement = _replace(replacement, "\\\\\\\\(\\d)", "\\$$1", false, true); 205 return replacement; 206 } 207 private static String _escape(String str) { 208 StringBuffer sb=new StringBuffer(); 209 int len=str.length(); 210 char c; 211 212 for(int i=0;i<len;i++) { 213 c=str.charAt(i); 214 215 if('+'==c) sb.append("\\+"); 216 else if('?'==c) sb.append("\\?"); 217 else if('$'==c) sb.append("\\$"); 218 else if('^'==c) sb.append("\\^"); 219 else if('\\'==c) { 220 if(i+1<len) { 221 char n=str.charAt(i+1); 222 if('\\'==n) { 223 if(i+2<len) { 224 char nn=str.charAt(i+2); 225 char x=0; 226 if('U'==nn) x='U'; 227 else if('L'==nn) x='L'; 228 else if('u'==nn) x='u'; 229 else if('l'==nn) x='l'; 230 else if('E'==nn) x='E'; 231 //else if('d'==nn) x='d'; 232 if(x!=0) { 233 sb.append("\\"+x); 234 i+=2; 235 continue; 236 } 237 } 238 } 239 } 240 sb.append(c); 241 } 242 else sb.append(c); 243 } 244 return sb.toString(); 245 } 246 247 public static void main(String[] args) throws MalformedPatternException { 248 find( 249 "<function[^>]*>.*?</function>", 250 "<function name=\"susi2\" some=\"1\">\n<argument name=\"susi\">\naaa</function>", 251 1,false); 252 } 253}