001    package railo.commons.lang;
002    
003    import java.net.MalformedURLException;
004    import java.net.URL;
005    import java.util.ArrayList;
006    import java.util.List;
007    
008    import railo.commons.net.HTTPUtil;
009    import railo.transformer.util.CFMLString;
010    
011    /**
012     * HTML Util class
013     *
014     */
015    public final class HTMLUtil {
016            
017            private final Tag[] tags=new Tag[]{
018                            new Tag("a","href"),
019                            new Tag("link","href"),
020                            new Tag("form","action"),
021                            new Tag("applet","code"),
022                            new Tag("script","src"),
023                            new Tag("body","background"),
024                            new Tag("frame","src"),
025                            new Tag("bgsound","src"),
026                            new Tag("img","src"),
027                            
028                            new Tag("embed",new String[]{"src","pluginspace"}),
029                            new Tag("object",new String[]{"data","classid","codebase","usemap"})
030                            
031            };
032            
033            
034            /**
035             * returns all urls in a html String
036             * @param html HTML String to search urls
037             * @param url Absolute URL path to set
038             * @return urls found in html String
039             */
040            public List getURLS(String html, URL url) {
041                    
042                ArrayList urls=new ArrayList();
043                    CFMLString cfml=new CFMLString(html,"UTF-8");
044                    while(!cfml.isAfterLast()) {
045                            if(cfml.forwardIfCurrent('<')) {
046                                    for(int i=0;i<tags.length;i++) {
047                                            if(cfml.forwardIfCurrent(tags[i].tag+" ")) {
048                                                    getSingleUrl(urls,cfml,tags[i],url);
049                                            }
050                                    }
051                            }
052                            else {
053                                    cfml.next();
054                            }
055                            
056                    }
057                    return urls;
058            }
059            
060            /**
061             * transform a single tag
062             * @param urls all urls founded
063             * @param cfml CFMl String Object containing plain HTML
064             * @param tag current tag totransform
065             * @param url absolute URL to Set at tag attribute
066             */
067            private void getSingleUrl(List urls,CFMLString cfml, Tag tag,URL url) {
068                    char quote=0;
069                    boolean inside=false;
070                    StringBuilder value=new StringBuilder();
071                    
072                    while(!cfml.isAfterLast()) {
073                            if(inside) {
074                                    if(quote!=0 && cfml.forwardIfCurrent(quote)) {
075                                            inside=false;
076                                            
077                                            add(urls,url,value.toString());
078                                    }
079                                    else if(quote==0 && (cfml.isCurrent(' ')||cfml.isCurrent("/>")||cfml.isCurrent('>')||cfml.isCurrent('\t')||cfml.isCurrent('\n'))) {
080                                            inside=false;
081                                            try {
082                                                    urls.add(new URL(url,value.toString()));
083                        } catch (MalformedURLException e) {}
084                                            cfml.next();
085                                    } 
086                                    else {
087                                            value.append(cfml.getCurrent());
088                                            cfml.next();
089                                    }
090                            }
091                            else if(cfml.forwardIfCurrent('>')) {
092                                    break;
093                            }
094                            else {
095                                    
096                                    for(int i=0;i<tag.attributes.length;i++) {
097                                            if(cfml.forwardIfCurrent(tag.attributes[i])) {
098                                                    cfml.removeSpace();
099                                                    // =
100                                                    if(cfml.isCurrent('=')) {
101                                                            inside=true;
102                                                            cfml.next();
103                                                            cfml.removeSpace();
104                                                            
105                                                            quote=cfml.getCurrent();
106                                                            value=new StringBuilder();
107                                                            if(quote!='"' && quote!='\'')quote=0;
108                                                            else {
109                                                                    cfml.next();
110                                                            }
111                                                    }
112                                            }
113                                    }
114                                    if(!inside) {
115                                            cfml.next();
116                                    }
117                            }
118                    }
119            }
120    
121        private void add(List list,URL baseURL,String value) {
122                    value=value.trim();
123                    String lcValue=value.toLowerCase();
124                    try {
125                            if(lcValue.startsWith("http://") || lcValue.startsWith("news://") || lcValue.startsWith("goopher://") || lcValue.startsWith("javascript:"))
126                                    list.add(HTTPUtil.toURL(value));
127                            else {
128                                    
129                                    
130                                    list.add(new URL(baseURL,value.toString()));
131                            }
132                    }
133                    catch(MalformedURLException mue) {}
134                    //print.err(list.get(list.size()-1));
135            }
136    
137            private class Tag {
138                    private String tag;
139                    private String[] attributes;
140                    private Tag(String tag,String[] attributes) {
141                            this.tag=tag.toLowerCase();
142                            this.attributes=new String[attributes.length];
143                            for(int i=0;i<attributes.length;i++) {
144                                    this.attributes[i]=attributes[i].toLowerCase();
145                            }
146                            
147                    }
148                    private Tag(String tag,String attribute1) {
149                            this.tag=tag.toLowerCase();
150                            this.attributes=new String[]{attribute1.toLowerCase()};
151                    }
152            
153            }
154    }