001/**
002 *
003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved.
004 *
005 * This library is free software; you can redistribute it and/or
006 * modify it under the terms of the GNU Lesser General Public
007 * License as published by the Free Software Foundation; either 
008 * version 2.1 of the License, or (at your option) any later version.
009 * 
010 * This library is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013 * Lesser General Public License for more details.
014 * 
015 * You should have received a copy of the GNU Lesser General Public 
016 * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
017 * 
018 **/
019package lucee.commons.lang;
020
021
022
023
024/**
025 * <p>Provides HTML and XML entity utilities.</p>
026 *
027 */
028public final class HTMLEntities {
029        
030    public static final short HTMLV20=1;
031    public static final short HTMLV32=1;
032    public static final short HTMLV40=2;
033
034    
035// Basic
036    private static final int OFFSET_BASIC=34;
037    private static final String[] BASIC_ARRAY = new String[63 - OFFSET_BASIC];
038    static {
039        BASIC_ARRAY[34 - OFFSET_BASIC]="quot";
040        BASIC_ARRAY[38 - OFFSET_BASIC]="amp";
041        BASIC_ARRAY[60 - OFFSET_BASIC]="lt";
042        BASIC_ARRAY[62 - OFFSET_BASIC]="gt";
043    }
044// HTML 32
045    private static final int OFFSET_ISO8859_1=160;
046    private static final String[] ISO8859_1_ARRAY = new String[256 - OFFSET_ISO8859_1];
047    static {
048        ISO8859_1_ARRAY[160 - OFFSET_ISO8859_1]="nbsp";
049        ISO8859_1_ARRAY[161 - OFFSET_ISO8859_1]="iexcl";
050        ISO8859_1_ARRAY[162 - OFFSET_ISO8859_1]="cent";
051        ISO8859_1_ARRAY[163 - OFFSET_ISO8859_1]="pound";
052        ISO8859_1_ARRAY[164 - OFFSET_ISO8859_1]="curren";
053        ISO8859_1_ARRAY[165 - OFFSET_ISO8859_1]="yen";
054        ISO8859_1_ARRAY[166 - OFFSET_ISO8859_1]="brvbar";
055        ISO8859_1_ARRAY[167 - OFFSET_ISO8859_1]="sect";
056        ISO8859_1_ARRAY[168 - OFFSET_ISO8859_1]="uml";
057        ISO8859_1_ARRAY[169 - OFFSET_ISO8859_1]="copy";
058        ISO8859_1_ARRAY[170 - OFFSET_ISO8859_1]="ordf";
059        ISO8859_1_ARRAY[171 - OFFSET_ISO8859_1]="laquo";
060        ISO8859_1_ARRAY[172 - OFFSET_ISO8859_1]="not";
061        ISO8859_1_ARRAY[173 - OFFSET_ISO8859_1]="shy";
062        ISO8859_1_ARRAY[174 - OFFSET_ISO8859_1]="reg";
063        ISO8859_1_ARRAY[175 - OFFSET_ISO8859_1]="macr";
064        ISO8859_1_ARRAY[176 - OFFSET_ISO8859_1]="deg";
065        ISO8859_1_ARRAY[177 - OFFSET_ISO8859_1]="plusmn";
066        ISO8859_1_ARRAY[178 - OFFSET_ISO8859_1]="sup2";
067        ISO8859_1_ARRAY[179 - OFFSET_ISO8859_1]="sup3";
068        ISO8859_1_ARRAY[180 - OFFSET_ISO8859_1]="acute";
069        ISO8859_1_ARRAY[181 - OFFSET_ISO8859_1]="micro";
070        ISO8859_1_ARRAY[182 - OFFSET_ISO8859_1]="para";
071        ISO8859_1_ARRAY[183 - OFFSET_ISO8859_1]="middot";
072        ISO8859_1_ARRAY[184 - OFFSET_ISO8859_1]="cedil";
073        ISO8859_1_ARRAY[185 - OFFSET_ISO8859_1]="sup1";
074        ISO8859_1_ARRAY[186 - OFFSET_ISO8859_1]="ordm";
075        ISO8859_1_ARRAY[187 - OFFSET_ISO8859_1]="raquo";
076        ISO8859_1_ARRAY[188 - OFFSET_ISO8859_1]="frac14";
077        ISO8859_1_ARRAY[189 - OFFSET_ISO8859_1]="frac12";
078        ISO8859_1_ARRAY[190 - OFFSET_ISO8859_1]="frac34";
079        ISO8859_1_ARRAY[191 - OFFSET_ISO8859_1]="iquest";
080        ISO8859_1_ARRAY[192 - OFFSET_ISO8859_1]="Agrave";
081        ISO8859_1_ARRAY[193 - OFFSET_ISO8859_1]="Aacute";
082        ISO8859_1_ARRAY[194 - OFFSET_ISO8859_1]="Acirc";
083        ISO8859_1_ARRAY[195 - OFFSET_ISO8859_1]="Atilde";
084        ISO8859_1_ARRAY[196 - OFFSET_ISO8859_1]="Auml";
085        ISO8859_1_ARRAY[197 - OFFSET_ISO8859_1]="Aring";
086        ISO8859_1_ARRAY[198 - OFFSET_ISO8859_1]="AElig";
087        ISO8859_1_ARRAY[199 - OFFSET_ISO8859_1]="Ccedil";
088        ISO8859_1_ARRAY[200 - OFFSET_ISO8859_1]="Egrave";
089        ISO8859_1_ARRAY[201 - OFFSET_ISO8859_1]="Eacute";
090        ISO8859_1_ARRAY[202 - OFFSET_ISO8859_1]="Ecirc";
091        ISO8859_1_ARRAY[203 - OFFSET_ISO8859_1]="Euml";
092        ISO8859_1_ARRAY[204 - OFFSET_ISO8859_1]="Igrave";
093        ISO8859_1_ARRAY[205 - OFFSET_ISO8859_1]="Iacute";
094        ISO8859_1_ARRAY[206 - OFFSET_ISO8859_1]="Icirc";
095        ISO8859_1_ARRAY[207 - OFFSET_ISO8859_1]="Iuml";
096        ISO8859_1_ARRAY[208 - OFFSET_ISO8859_1]="ETH";
097        ISO8859_1_ARRAY[209 - OFFSET_ISO8859_1]="Ntilde";
098        ISO8859_1_ARRAY[210 - OFFSET_ISO8859_1]="Ograve";
099        ISO8859_1_ARRAY[211 - OFFSET_ISO8859_1]="Oacute";
100        ISO8859_1_ARRAY[212 - OFFSET_ISO8859_1]="Ocirc";
101        ISO8859_1_ARRAY[213 - OFFSET_ISO8859_1]="Otilde";
102        ISO8859_1_ARRAY[214 - OFFSET_ISO8859_1]="Ouml";
103        ISO8859_1_ARRAY[215 - OFFSET_ISO8859_1]="times";
104        ISO8859_1_ARRAY[216 - OFFSET_ISO8859_1]="Oslash";
105        ISO8859_1_ARRAY[217 - OFFSET_ISO8859_1]="Ugrave";
106        ISO8859_1_ARRAY[218 - OFFSET_ISO8859_1]="Uacute";
107        ISO8859_1_ARRAY[219 - OFFSET_ISO8859_1]="Ucirc";
108        ISO8859_1_ARRAY[220 - OFFSET_ISO8859_1]="Uuml";
109        ISO8859_1_ARRAY[221 - OFFSET_ISO8859_1]="Yacute";
110        ISO8859_1_ARRAY[222 - OFFSET_ISO8859_1]="THORN";
111        ISO8859_1_ARRAY[223 - OFFSET_ISO8859_1]="szlig";
112        ISO8859_1_ARRAY[224 - OFFSET_ISO8859_1]="agrave";
113        ISO8859_1_ARRAY[225 - OFFSET_ISO8859_1]="aacute";
114        ISO8859_1_ARRAY[226 - OFFSET_ISO8859_1]="acirc";
115        ISO8859_1_ARRAY[227 - OFFSET_ISO8859_1]="atilde";
116        ISO8859_1_ARRAY[228 - OFFSET_ISO8859_1]="auml";
117        ISO8859_1_ARRAY[229 - OFFSET_ISO8859_1]="aring";
118        ISO8859_1_ARRAY[230 - OFFSET_ISO8859_1]="aelig";
119        ISO8859_1_ARRAY[231 - OFFSET_ISO8859_1]="ccedil";
120        ISO8859_1_ARRAY[232 - OFFSET_ISO8859_1]="egrave";
121        ISO8859_1_ARRAY[233 - OFFSET_ISO8859_1]="eacute";
122        ISO8859_1_ARRAY[234 - OFFSET_ISO8859_1]="ecirc";
123        ISO8859_1_ARRAY[235 - OFFSET_ISO8859_1]="euml";
124        ISO8859_1_ARRAY[236 - OFFSET_ISO8859_1]="igrave";
125        ISO8859_1_ARRAY[237 - OFFSET_ISO8859_1]="iacute";
126        ISO8859_1_ARRAY[238 - OFFSET_ISO8859_1]="icirc";
127        ISO8859_1_ARRAY[239 - OFFSET_ISO8859_1]="iuml";
128        ISO8859_1_ARRAY[240 - OFFSET_ISO8859_1]="eth";
129        ISO8859_1_ARRAY[241 - OFFSET_ISO8859_1]="ntilde";
130        ISO8859_1_ARRAY[242 - OFFSET_ISO8859_1]="ograve";
131        ISO8859_1_ARRAY[243 - OFFSET_ISO8859_1]="oacute";
132        ISO8859_1_ARRAY[244 - OFFSET_ISO8859_1]="ocirc";
133        ISO8859_1_ARRAY[245 - OFFSET_ISO8859_1]="otilde";
134        ISO8859_1_ARRAY[246 - OFFSET_ISO8859_1]="ouml";
135        ISO8859_1_ARRAY[247 - OFFSET_ISO8859_1]="divide";
136        ISO8859_1_ARRAY[248 - OFFSET_ISO8859_1]="oslash";
137        ISO8859_1_ARRAY[249 - OFFSET_ISO8859_1]="ugrave";
138        ISO8859_1_ARRAY[250 - OFFSET_ISO8859_1]="uacute";
139        ISO8859_1_ARRAY[251 - OFFSET_ISO8859_1]="ucirc";
140        ISO8859_1_ARRAY[252 - OFFSET_ISO8859_1]="uuml";
141        ISO8859_1_ARRAY[253 - OFFSET_ISO8859_1]="yacute";
142        ISO8859_1_ARRAY[254 - OFFSET_ISO8859_1]="thorn";
143        ISO8859_1_ARRAY[255 - OFFSET_ISO8859_1]="yuml";
144    }
145
146//  HTML 40 (1)
147    private static final int OFFSET_HTML40_1=402;
148    private static final String[] HTML40_1_ARRAY = new String[403 - OFFSET_HTML40_1];
149    static {
150        HTML40_1_ARRAY[402 - OFFSET_HTML40_1]="fnof";
151    }
152
153//  HTML 40 (2)
154    private static final int OFFSET_HTML40_2=913;
155    private static final String[] HTML40_2_ARRAY = new String[983 - OFFSET_HTML40_2];
156    static {            
157        HTML40_2_ARRAY[913 - OFFSET_HTML40_2]="Alpha";
158        HTML40_2_ARRAY[914 - OFFSET_HTML40_2]="Beta";
159        HTML40_2_ARRAY[915 - OFFSET_HTML40_2]="Gamma";
160        HTML40_2_ARRAY[916 - OFFSET_HTML40_2]="Delta";
161        HTML40_2_ARRAY[917 - OFFSET_HTML40_2]="Epsilon";
162        HTML40_2_ARRAY[918 - OFFSET_HTML40_2]="Zeta";
163        HTML40_2_ARRAY[919 - OFFSET_HTML40_2]="Eta";
164        HTML40_2_ARRAY[920 - OFFSET_HTML40_2]="Theta";
165        HTML40_2_ARRAY[921 - OFFSET_HTML40_2]="Iota";
166        HTML40_2_ARRAY[922 - OFFSET_HTML40_2]="Kappa";
167        HTML40_2_ARRAY[923 - OFFSET_HTML40_2]="Lambda";
168        HTML40_2_ARRAY[924 - OFFSET_HTML40_2]="Mu";
169        HTML40_2_ARRAY[925 - OFFSET_HTML40_2]="Nu";
170        HTML40_2_ARRAY[926 - OFFSET_HTML40_2]="Xi";
171        HTML40_2_ARRAY[927 - OFFSET_HTML40_2]="Omicron";
172        HTML40_2_ARRAY[928 - OFFSET_HTML40_2]="Pi";
173        HTML40_2_ARRAY[929 - OFFSET_HTML40_2]="Rho";
174        HTML40_2_ARRAY[931 - OFFSET_HTML40_2]="Sigma";
175        HTML40_2_ARRAY[932 - OFFSET_HTML40_2]="Tau";
176        HTML40_2_ARRAY[933 - OFFSET_HTML40_2]="Upsilon";
177        HTML40_2_ARRAY[934 - OFFSET_HTML40_2]="Phi";
178        HTML40_2_ARRAY[935 - OFFSET_HTML40_2]="Chi";
179        HTML40_2_ARRAY[936 - OFFSET_HTML40_2]="Psi";
180        HTML40_2_ARRAY[937 - OFFSET_HTML40_2]="Omega";
181        HTML40_2_ARRAY[945 - OFFSET_HTML40_2]="alpha";
182        HTML40_2_ARRAY[946 - OFFSET_HTML40_2]="beta";
183        HTML40_2_ARRAY[947 - OFFSET_HTML40_2]="gamma";
184        HTML40_2_ARRAY[948 - OFFSET_HTML40_2]="delta";
185        HTML40_2_ARRAY[949 - OFFSET_HTML40_2]="epsilon";
186        HTML40_2_ARRAY[950 - OFFSET_HTML40_2]="zeta";
187        HTML40_2_ARRAY[951 - OFFSET_HTML40_2]="eta";
188        HTML40_2_ARRAY[952 - OFFSET_HTML40_2]="theta";
189        HTML40_2_ARRAY[953 - OFFSET_HTML40_2]="iota";
190        HTML40_2_ARRAY[954 - OFFSET_HTML40_2]="kappa";
191        HTML40_2_ARRAY[955 - OFFSET_HTML40_2]="lambda";
192        HTML40_2_ARRAY[956 - OFFSET_HTML40_2]="mu";
193        HTML40_2_ARRAY[957 - OFFSET_HTML40_2]="nu";
194        HTML40_2_ARRAY[958 - OFFSET_HTML40_2]="xi";
195        HTML40_2_ARRAY[959 - OFFSET_HTML40_2]="omicron";
196        HTML40_2_ARRAY[960 - OFFSET_HTML40_2]="pi";
197        HTML40_2_ARRAY[961 - OFFSET_HTML40_2]="rho";
198        HTML40_2_ARRAY[962 - OFFSET_HTML40_2]="sigmaf";
199        HTML40_2_ARRAY[963 - OFFSET_HTML40_2]="sigma";
200        HTML40_2_ARRAY[964 - OFFSET_HTML40_2]="tau";
201        HTML40_2_ARRAY[965 - OFFSET_HTML40_2]="upsilon";
202        HTML40_2_ARRAY[966 - OFFSET_HTML40_2]="phi";
203        HTML40_2_ARRAY[967 - OFFSET_HTML40_2]="chi";
204        HTML40_2_ARRAY[968 - OFFSET_HTML40_2]="psi";
205        HTML40_2_ARRAY[969 - OFFSET_HTML40_2]="omega";
206        HTML40_2_ARRAY[977 - OFFSET_HTML40_2]="thetasym";
207        HTML40_2_ARRAY[978 - OFFSET_HTML40_2]="upsih";
208        HTML40_2_ARRAY[982 - OFFSET_HTML40_2]="piv";
209    }
210
211//  HTML 40 (3)
212    private static final int OFFSET_HTML40_3=338;
213    private static final String[] HTML40_3_ARRAY = new String[377 - OFFSET_HTML40_3];
214    static { 
215                HTML40_2_ARRAY[338 - OFFSET_HTML40_3]="OElig";
216                HTML40_2_ARRAY[339 - OFFSET_HTML40_3]="oelig";
217                HTML40_2_ARRAY[352 - OFFSET_HTML40_3]="Scaron";
218                HTML40_2_ARRAY[353 - OFFSET_HTML40_3]="scaron";
219                HTML40_2_ARRAY[376 - OFFSET_HTML40_3]="Yuml";
220    }
221
222//  HTML 40 (4)
223    private static final int OFFSET_HTML40_4=710;
224    private static final String[] HTML40_4_ARRAY = new String[733 - OFFSET_HTML40_4];
225    static { 
226        HTML40_4_ARRAY[710 - OFFSET_HTML40_4]="circ";
227        HTML40_4_ARRAY[732 - OFFSET_HTML40_4]="tilde";
228    }
229
230//  HTML 40 (5)
231    private static final int OFFSET_HTML40_5=8194;
232    private static final String[] HTML40_5_ARRAY = new String[9831 - OFFSET_HTML40_5];
233    static { 
234        HTML40_5_ARRAY[8194 - OFFSET_HTML40_5]="ensp";
235        HTML40_5_ARRAY[8195 - OFFSET_HTML40_5]="emsp";
236        HTML40_5_ARRAY[8201 - OFFSET_HTML40_5]="thinsp";
237        HTML40_5_ARRAY[8204 - OFFSET_HTML40_5]="zwnj";
238        HTML40_5_ARRAY[8205 - OFFSET_HTML40_5]="zwj";
239        HTML40_5_ARRAY[8206 - OFFSET_HTML40_5]="lrm";
240        HTML40_5_ARRAY[8207 - OFFSET_HTML40_5]="rlm";
241        HTML40_5_ARRAY[8211 - OFFSET_HTML40_5]="ndash";
242        HTML40_5_ARRAY[8212 - OFFSET_HTML40_5]="mdash";
243        HTML40_5_ARRAY[8216 - OFFSET_HTML40_5]="lsquo";
244        HTML40_5_ARRAY[8217 - OFFSET_HTML40_5]="rsquo";
245        HTML40_5_ARRAY[8218 - OFFSET_HTML40_5]="sbquo";
246        HTML40_5_ARRAY[8220 - OFFSET_HTML40_5]="ldquo";
247        HTML40_5_ARRAY[8221 - OFFSET_HTML40_5]="rdquo";
248        HTML40_5_ARRAY[8222 - OFFSET_HTML40_5]="bdquo";
249        HTML40_5_ARRAY[8224 - OFFSET_HTML40_5]="dagger";
250        HTML40_5_ARRAY[8225 - OFFSET_HTML40_5]="Dagger";        
251        HTML40_5_ARRAY[8226 - OFFSET_HTML40_5]="bull";
252        HTML40_5_ARRAY[8230 - OFFSET_HTML40_5]="hellip";
253        HTML40_5_ARRAY[8240 - OFFSET_HTML40_5]="permil";
254        HTML40_5_ARRAY[8242 - OFFSET_HTML40_5]="prime";
255        HTML40_5_ARRAY[8243 - OFFSET_HTML40_5]="Prime";
256        HTML40_5_ARRAY[8249 - OFFSET_HTML40_5]="lsaquo";
257        HTML40_5_ARRAY[8250 - OFFSET_HTML40_5]="rsaquo";
258        HTML40_5_ARRAY[8254 - OFFSET_HTML40_5]="oline";
259        HTML40_5_ARRAY[8260 - OFFSET_HTML40_5]="frasl";
260        HTML40_5_ARRAY[8364 - OFFSET_HTML40_5]="euro";
261        HTML40_5_ARRAY[8472 - OFFSET_HTML40_5]="weierp";
262        HTML40_5_ARRAY[8465 - OFFSET_HTML40_5]="image";
263        HTML40_5_ARRAY[8476 - OFFSET_HTML40_5]="real";
264        HTML40_5_ARRAY[8482 - OFFSET_HTML40_5]="trade";
265        HTML40_5_ARRAY[8501 - OFFSET_HTML40_5]="alefsym";
266        HTML40_5_ARRAY[8592 - OFFSET_HTML40_5]="larr";
267        HTML40_5_ARRAY[8593 - OFFSET_HTML40_5]="uarr";
268        HTML40_5_ARRAY[8594 - OFFSET_HTML40_5]="rarr";
269        HTML40_5_ARRAY[8595 - OFFSET_HTML40_5]="darr";
270        HTML40_5_ARRAY[8596 - OFFSET_HTML40_5]="harr";
271        HTML40_5_ARRAY[8629 - OFFSET_HTML40_5]="crarr";
272        HTML40_5_ARRAY[8656 - OFFSET_HTML40_5]="lArr";
273        HTML40_5_ARRAY[8657 - OFFSET_HTML40_5]="uArr";
274        HTML40_5_ARRAY[8658 - OFFSET_HTML40_5]="rArr";
275        HTML40_5_ARRAY[8659 - OFFSET_HTML40_5]="dArr";
276        HTML40_5_ARRAY[8660 - OFFSET_HTML40_5]="hArr";
277        HTML40_5_ARRAY[8704 - OFFSET_HTML40_5]="forall";
278        HTML40_5_ARRAY[8706 - OFFSET_HTML40_5]="part";
279        HTML40_5_ARRAY[8707 - OFFSET_HTML40_5]="exist";
280        HTML40_5_ARRAY[8709 - OFFSET_HTML40_5]="empty";
281        HTML40_5_ARRAY[8711 - OFFSET_HTML40_5]="nabla";
282        HTML40_5_ARRAY[8712 - OFFSET_HTML40_5]="isin";
283        HTML40_5_ARRAY[8713 - OFFSET_HTML40_5]="notin";
284        HTML40_5_ARRAY[8715 - OFFSET_HTML40_5]="ni";
285        HTML40_5_ARRAY[8719 - OFFSET_HTML40_5]="prod";
286        HTML40_5_ARRAY[8721 - OFFSET_HTML40_5]="sum";
287        HTML40_5_ARRAY[8722 - OFFSET_HTML40_5]="minus";
288        HTML40_5_ARRAY[8727 - OFFSET_HTML40_5]="lowast";
289        HTML40_5_ARRAY[8730 - OFFSET_HTML40_5]="radic";
290        HTML40_5_ARRAY[8733 - OFFSET_HTML40_5]="prop";
291        HTML40_5_ARRAY[8734 - OFFSET_HTML40_5]="infin";
292        HTML40_5_ARRAY[8736 - OFFSET_HTML40_5]="ang";
293        HTML40_5_ARRAY[8743 - OFFSET_HTML40_5]="and";
294        HTML40_5_ARRAY[8744 - OFFSET_HTML40_5]="or";
295        HTML40_5_ARRAY[8745 - OFFSET_HTML40_5]="cap";
296        HTML40_5_ARRAY[8746 - OFFSET_HTML40_5]="cup";
297        HTML40_5_ARRAY[8747 - OFFSET_HTML40_5]="int";
298        HTML40_5_ARRAY[8756 - OFFSET_HTML40_5]="there4";
299        HTML40_5_ARRAY[8764 - OFFSET_HTML40_5]="sim";
300        HTML40_5_ARRAY[8773 - OFFSET_HTML40_5]="cong";
301        HTML40_5_ARRAY[8776 - OFFSET_HTML40_5]="asymp";
302        HTML40_5_ARRAY[8800 - OFFSET_HTML40_5]="ne";
303        HTML40_5_ARRAY[8801 - OFFSET_HTML40_5]="equiv";
304        HTML40_5_ARRAY[8804 - OFFSET_HTML40_5]="le";
305        HTML40_5_ARRAY[8805 - OFFSET_HTML40_5]="ge";
306        HTML40_5_ARRAY[8834 - OFFSET_HTML40_5]="sub";
307        HTML40_5_ARRAY[8835 - OFFSET_HTML40_5]="sup";
308        HTML40_5_ARRAY[8838 - OFFSET_HTML40_5]="sube";
309        HTML40_5_ARRAY[8839 - OFFSET_HTML40_5]="supe";
310        HTML40_5_ARRAY[8853 - OFFSET_HTML40_5]="oplus";
311        HTML40_5_ARRAY[8855 - OFFSET_HTML40_5]="otimes";
312        HTML40_5_ARRAY[8869 - OFFSET_HTML40_5]="perp";
313        HTML40_5_ARRAY[8901 - OFFSET_HTML40_5]="sdot";
314        HTML40_5_ARRAY[8968 - OFFSET_HTML40_5]="lceil";
315        HTML40_5_ARRAY[8969 - OFFSET_HTML40_5]="rceil";
316        HTML40_5_ARRAY[8970 - OFFSET_HTML40_5]="lfloor";
317        HTML40_5_ARRAY[8971 - OFFSET_HTML40_5]="rfloor";
318        HTML40_5_ARRAY[9001 - OFFSET_HTML40_5]="lang";
319        HTML40_5_ARRAY[9002 - OFFSET_HTML40_5]="rang";
320        HTML40_5_ARRAY[9674 - OFFSET_HTML40_5]="loz";
321        HTML40_5_ARRAY[9824 - OFFSET_HTML40_5]="spades";
322        HTML40_5_ARRAY[9827 - OFFSET_HTML40_5]="clubs";
323        HTML40_5_ARRAY[9829 - OFFSET_HTML40_5]="hearts";
324        HTML40_5_ARRAY[9830 - OFFSET_HTML40_5]="diams";
325    }
326
327//  HTML 20
328     private static final String[][] HTML20_DATA= {
329                BASIC_ARRAY
330     };
331     private static final int[] HTML20_OFFSET= {
332                OFFSET_BASIC
333     };
334     
335//  HTML 32
336     private static final String[][] HTML32_DATA= {
337                BASIC_ARRAY,
338                        ISO8859_1_ARRAY
339     };
340     private static final int[] HTML32_OFFSET= {
341                OFFSET_BASIC,
342                        OFFSET_ISO8859_1
343     };
344
345//   HTML 40
346      private static final String[][] HTML40_DATA= {
347                BASIC_ARRAY,
348                        ISO8859_1_ARRAY,
349                        HTML40_1_ARRAY,
350                        HTML40_2_ARRAY,
351                        HTML40_3_ARRAY,
352                        HTML40_4_ARRAY,
353                        HTML40_5_ARRAY
354      };
355      private static final int[] HTML40_OFFSET= {
356                OFFSET_BASIC,
357                        OFFSET_ISO8859_1,
358                        OFFSET_HTML40_1,
359                        OFFSET_HTML40_2,
360                        OFFSET_HTML40_3,
361                        OFFSET_HTML40_4,
362                        OFFSET_HTML40_5
363      };
364        private static final char CR = (char)13;
365
366      
367      /**
368     * escapes html character inside a string
369     * @param str html code to escape
370     * @return escaped html code
371     */
372    public static String escapeHTML(String str) {
373        return escapeHTML(str,HTMLV40);
374    }
375    
376    /**
377     * escapes html character inside a string
378     * @param str html code to escape
379     * @param version HTML Version ()
380     * @return escaped html code
381     */
382    public static String escapeHTML(String str,short version) {
383        String[][] data;
384        int[] offset;
385        StringBuilder rtn=new StringBuilder(str.length());
386        char[] chars=str.toCharArray();
387        
388        if(version==HTMLV20) {
389                data=HTML20_DATA;
390                offset=HTML20_OFFSET;
391        }
392        else if(version==HTMLV32) {
393                data=HTML32_DATA;
394                offset=HTML32_OFFSET;
395        }
396        else {
397                data=HTML40_DATA;
398                offset=HTML40_OFFSET;
399        }
400        
401        outer:for(int i=0;i<chars.length;i++) {
402                char c=chars[i];
403                if(c==CR) continue;// for compatibility to ACF
404                for(int y=0;y<offset.length;y++) {
405                        if(c>=offset[y] && c<data[y].length+offset[y]) {
406                                
407                                String replacement=data[y][c-offset[y]];
408                                if(replacement!=null) {
409                                        rtn.append('&');
410                                        rtn.append(replacement);
411                                        rtn.append(';');
412                                        continue outer;
413                                }
414                        }
415                }
416                rtn.append(c);
417        }
418        return rtn.toString();
419      }
420    
421    /**
422     * unescapes html character inside a string
423     * @param str html code to unescape
424     * @return unescaped html code
425     */
426    public static String unescapeHTML(String str) {
427        
428        StringBuilder rtn=new StringBuilder();
429        int posStart=-1;
430        int posFinish=-1;
431        while((posStart=str.indexOf('&',posStart))!=-1) {
432                int last=posFinish+1;
433                
434                posFinish=str.indexOf(';',posStart);
435                if(posFinish==-1)break;
436                rtn.append(str.substring(last,posStart));
437                if(posStart+1<posFinish) {
438                        rtn.append(unescapeHTMLEntity(str.substring(posStart+1,posFinish)));
439                }
440                else {
441                        rtn.append("&;");
442                }
443                
444                posStart=posFinish+1;
445        }
446        rtn.append(str.substring(posFinish+1));
447        return rtn.toString();
448    }
449    
450    private static String unescapeHTMLEntity(String str) {
451        String[][] ranges=HTML40_DATA;
452        int[] offset=HTML40_OFFSET;
453        
454        // Number Entity
455                if(str.indexOf('#')==0) {
456                        if(str.length()==1)return "&"+str+";"; 
457                        try {
458                                return ((char)Integer.parseInt(str.substring(1)))+"";
459                        }
460                        catch(NumberFormatException nfe) {
461                                return "&"+str+";";
462                        }
463                }
464                
465        // String Entity
466                //else {
467                        for(int i=0;i<ranges.length;i++) {
468                                String[] range=ranges[i];
469                                for(int y=0;y<range.length;y++) {
470                                        String el=range[y];
471                                        if(el!=null && el.equalsIgnoreCase(str)) {
472                                                return ((char)(y+offset[i]))+"";
473                                        }
474                                }
475                        }
476                //}
477        
478        return "&"+str+";";
479    }
480}