001/** 002 * 003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved. 004 * 005 * This library is free software; you can redistribute it and/or 006 * modify it under the terms of the GNU Lesser General Public 007 * License as published by the Free Software Foundation; either 008 * version 2.1 of the License, or (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013 * Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public 016 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 017 * 018 **/ 019package lucee.commons.lang; 020 021import java.net.MalformedURLException; 022import java.net.URL; 023import java.util.ArrayList; 024import java.util.List; 025 026import lucee.commons.io.CharsetUtil; 027import lucee.commons.net.HTTPUtil; 028import lucee.transformer.util.CFMLString; 029 030/** 031 * HTML Util class 032 * 033 */ 034public final class HTMLUtil { 035 036 private final Tag[] tags=new Tag[]{ 037 new Tag("a","href"), 038 new Tag("link","href"), 039 new Tag("form","action"), 040 new Tag("applet","code"), 041 new Tag("script","src"), 042 new Tag("body","background"), 043 new Tag("frame","src"), 044 new Tag("bgsound","src"), 045 new Tag("img","src"), 046 047 new Tag("embed",new String[]{"src","pluginspace"}), 048 new Tag("object",new String[]{"data","classid","codebase","usemap"}) 049 050 }; 051 052 053 /** 054 * returns all urls in a html String 055 * @param html HTML String to search urls 056 * @param url Absolute URL path to set 057 * @return urls found in html String 058 */ 059 public List getURLS(String html, URL url) { 060 061 ArrayList urls=new ArrayList(); 062 CFMLString cfml=new CFMLString(html,CharsetUtil.UTF8); 063 while(!cfml.isAfterLast()) { 064 if(cfml.forwardIfCurrent('<')) { 065 for(int i=0;i<tags.length;i++) { 066 if(cfml.forwardIfCurrent(tags[i].tag+" ")) { 067 getSingleUrl(urls,cfml,tags[i],url); 068 } 069 } 070 } 071 else { 072 cfml.next(); 073 } 074 075 } 076 return urls; 077 } 078 079 /** 080 * transform a single tag 081 * @param urls all urls founded 082 * @param cfml CFMl String Object containing plain HTML 083 * @param tag current tag totransform 084 * @param url absolute URL to Set at tag attribute 085 */ 086 private void getSingleUrl(List urls,CFMLString cfml, Tag tag,URL url) { 087 char quote=0; 088 boolean inside=false; 089 StringBuilder value=new StringBuilder(); 090 091 while(!cfml.isAfterLast()) { 092 if(inside) { 093 if(quote!=0 && cfml.forwardIfCurrent(quote)) { 094 inside=false; 095 096 add(urls,url,value.toString()); 097 } 098 else if(quote==0 && (cfml.isCurrent(' ')||cfml.isCurrent("/>")||cfml.isCurrent('>')||cfml.isCurrent('\t')||cfml.isCurrent('\n'))) { 099 inside=false; 100 try { 101 urls.add(new URL(url,value.toString())); 102 } catch (MalformedURLException e) {} 103 cfml.next(); 104 } 105 else { 106 value.append(cfml.getCurrent()); 107 cfml.next(); 108 } 109 } 110 else if(cfml.forwardIfCurrent('>')) { 111 break; 112 } 113 else { 114 115 for(int i=0;i<tag.attributes.length;i++) { 116 if(cfml.forwardIfCurrent(tag.attributes[i])) { 117 cfml.removeSpace(); 118 // = 119 if(cfml.isCurrent('=')) { 120 inside=true; 121 cfml.next(); 122 cfml.removeSpace(); 123 124 quote=cfml.getCurrent(); 125 value=new StringBuilder(); 126 if(quote!='"' && quote!='\'')quote=0; 127 else { 128 cfml.next(); 129 } 130 } 131 } 132 } 133 if(!inside) { 134 cfml.next(); 135 } 136 } 137 } 138 } 139 140 private void add(List list,URL baseURL,String value) { 141 value=value.trim(); 142 String lcValue=value.toLowerCase(); 143 try { 144 if(lcValue.startsWith("http://") || lcValue.startsWith("news://") || lcValue.startsWith("goopher://") || lcValue.startsWith("javascript:")) 145 list.add(HTTPUtil.toURL(value,true)); 146 else { 147 148 149 list.add(new URL(baseURL,value.toString())); 150 } 151 } 152 catch(MalformedURLException mue) {} 153 //print.err(list.get(list.size()-1)); 154 } 155 156 private class Tag { 157 private String tag; 158 private String[] attributes; 159 private Tag(String tag,String[] attributes) { 160 this.tag=tag.toLowerCase(); 161 this.attributes=new String[attributes.length]; 162 for(int i=0;i<attributes.length;i++) { 163 this.attributes[i]=attributes[i].toLowerCase(); 164 } 165 166 } 167 private Tag(String tag,String attribute1) { 168 this.tag=tag.toLowerCase(); 169 this.attributes=new String[]{attribute1.toLowerCase()}; 170 } 171 172 } 173}