001 package railo.commons.lang; 002 003 import java.net.MalformedURLException; 004 import java.net.URL; 005 import java.util.ArrayList; 006 import java.util.List; 007 008 import railo.commons.net.HTTPUtil; 009 import railo.transformer.util.CFMLString; 010 011 /** 012 * HTML Util class 013 * 014 */ 015 public final class HTMLUtil { 016 017 private final Tag[] tags=new Tag[]{ 018 new Tag("a","href"), 019 new Tag("link","href"), 020 new Tag("form","action"), 021 new Tag("applet","code"), 022 new Tag("script","src"), 023 new Tag("body","background"), 024 new Tag("frame","src"), 025 new Tag("bgsound","src"), 026 new Tag("img","src"), 027 028 new Tag("embed",new String[]{"src","pluginspace"}), 029 new Tag("object",new String[]{"data","classid","codebase","usemap"}) 030 031 }; 032 033 034 /** 035 * returns all urls in a html String 036 * @param html HTML String to search urls 037 * @param url Absolute URL path to set 038 * @return urls found in html String 039 */ 040 public List getURLS(String html, URL url) { 041 042 ArrayList urls=new ArrayList(); 043 CFMLString cfml=new CFMLString(html,"UTF-8"); 044 while(!cfml.isAfterLast()) { 045 if(cfml.forwardIfCurrent('<')) { 046 for(int i=0;i<tags.length;i++) { 047 if(cfml.forwardIfCurrent(tags[i].tag+" ")) { 048 getSingleUrl(urls,cfml,tags[i],url); 049 } 050 } 051 } 052 else { 053 cfml.next(); 054 } 055 056 } 057 return urls; 058 } 059 060 /** 061 * transform a single tag 062 * @param urls all urls founded 063 * @param cfml CFMl String Object containing plain HTML 064 * @param tag current tag totransform 065 * @param url absolute URL to Set at tag attribute 066 */ 067 private void getSingleUrl(List urls,CFMLString cfml, Tag tag,URL url) { 068 char quote=0; 069 boolean inside=false; 070 StringBuilder value=new StringBuilder(); 071 072 while(!cfml.isAfterLast()) { 073 if(inside) { 074 if(quote!=0 && cfml.forwardIfCurrent(quote)) { 075 inside=false; 076 077 add(urls,url,value.toString()); 078 } 079 else if(quote==0 && (cfml.isCurrent(' ')||cfml.isCurrent("/>")||cfml.isCurrent('>')||cfml.isCurrent('\t')||cfml.isCurrent('\n'))) { 080 inside=false; 081 try { 082 urls.add(new URL(url,value.toString())); 083 } catch (MalformedURLException e) {} 084 cfml.next(); 085 } 086 else { 087 value.append(cfml.getCurrent()); 088 cfml.next(); 089 } 090 } 091 else if(cfml.forwardIfCurrent('>')) { 092 break; 093 } 094 else { 095 096 for(int i=0;i<tag.attributes.length;i++) { 097 if(cfml.forwardIfCurrent(tag.attributes[i])) { 098 cfml.removeSpace(); 099 // = 100 if(cfml.isCurrent('=')) { 101 inside=true; 102 cfml.next(); 103 cfml.removeSpace(); 104 105 quote=cfml.getCurrent(); 106 value=new StringBuilder(); 107 if(quote!='"' && quote!='\'')quote=0; 108 else { 109 cfml.next(); 110 } 111 } 112 } 113 } 114 if(!inside) { 115 cfml.next(); 116 } 117 } 118 } 119 } 120 121 private void add(List list,URL baseURL,String value) { 122 value=value.trim(); 123 String lcValue=value.toLowerCase(); 124 try { 125 if(lcValue.startsWith("http://") || lcValue.startsWith("news://") || lcValue.startsWith("goopher://") || lcValue.startsWith("javascript:")) 126 list.add(HTTPUtil.toURL(value)); 127 else { 128 129 130 list.add(new URL(baseURL,value.toString())); 131 } 132 } 133 catch(MalformedURLException mue) {} 134 //print.err(list.get(list.size()-1)); 135 } 136 137 private class Tag { 138 private String tag; 139 private String[] attributes; 140 private Tag(String tag,String[] attributes) { 141 this.tag=tag.toLowerCase(); 142 this.attributes=new String[attributes.length]; 143 for(int i=0;i<attributes.length;i++) { 144 this.attributes[i]=attributes[i].toLowerCase(); 145 } 146 147 } 148 private Tag(String tag,String attribute1) { 149 this.tag=tag.toLowerCase(); 150 this.attributes=new String[]{attribute1.toLowerCase()}; 151 } 152 153 } 154 }