001    package railo.runtime.text.feed;
002    
003    import java.io.IOException;
004    import java.io.InputStream;
005    
006    import org.xml.sax.Attributes;
007    import org.xml.sax.InputSource;
008    import org.xml.sax.Locator;
009    import org.xml.sax.SAXException;
010    import org.xml.sax.XMLReader;
011    import org.xml.sax.ext.Locator2;
012    import org.xml.sax.helpers.DefaultHandler;
013    import org.xml.sax.helpers.XMLReaderFactory;
014    
015    import railo.commons.io.IOUtil;
016    import railo.commons.io.SystemUtil;
017    import railo.commons.io.res.Resource;
018    import railo.commons.lang.StringUtil;
019    import railo.runtime.exp.DatabaseException;
020    import railo.runtime.exp.PageException;
021    import railo.runtime.type.Collection;
022    import railo.runtime.type.Collection.Key;
023    import railo.runtime.type.KeyImpl;
024    import railo.runtime.type.Query;
025    import railo.runtime.type.QueryImpl;
026    import railo.runtime.type.Struct;
027    import railo.runtime.type.StructImpl;
028    
029    public final class RSSHandler extends DefaultHandler {
030            
031            public final static String DEFAULT_SAX_PARSER="org.apache.xerces.parsers.SAXParser";
032    
033            private static final Key RSSLINK = KeyImpl.intern("RSSLINK");
034            private static final Key CONTENT = KeyImpl.intern("CONTENT");
035    
036            private static final Key LINK = KeyImpl.intern("LINK");
037            private static final Key DESCRIPTION = KeyImpl.intern("DESCRIPTION");
038            
039            private static Collection.Key[] COLUMNS=new Collection.Key[]{
040                    KeyImpl.intern("AUTHOREMAIL"),
041                    KeyImpl.intern("AUTHORNAME"),
042                    KeyImpl.intern("AUTHORURI"),
043                    KeyImpl.intern("CATEGORYLABEL"),
044                    KeyImpl.intern("CATEGORYSCHEME"),
045                    KeyImpl.intern("CATEGORYTERM"),
046                    KeyImpl.intern("COMMENTS"),
047                    CONTENT,
048                    KeyImpl.intern("CONTENTMODE"),
049                    KeyImpl.intern("CONTENTSRC"),
050                    KeyImpl.intern("CONTENTTYPE"),
051                    KeyImpl.intern("CONTRIBUTOREMAIL"),
052                    KeyImpl.intern("CONTRIBUTORNAME"),
053                    KeyImpl.intern("CONTRIBUTORURI"),
054                    KeyImpl.intern("CREATEDDATE"),
055                    KeyImpl.intern("EXPIRATIONDATE"),
056                    KeyImpl.intern("ID"),
057                    KeyImpl.intern("IDPERMALINK"),
058                    KeyImpl.intern("LINKHREF"),
059                    KeyImpl.intern("LINKHREFLANG"),
060                    KeyImpl.intern("LINKLENGTH"),
061                    KeyImpl.intern("LINKREL"),
062                    KeyImpl.intern("LINKTITLE"),
063                    KeyImpl.intern("LINKTYPE"),
064                    KeyImpl.intern("PUBLISHEDDATE"),
065                    KeyImpl.intern("RIGHTS"),
066                    RSSLINK,
067                    KeyImpl.intern("SOURCE"),
068                    KeyImpl.intern("SOURCEURL"),
069                    KeyImpl.intern("SUMMARY"),
070                    KeyImpl.intern("SUMMARYMODE"),
071                    KeyImpl.intern("SUMMARYSRC"),
072                    KeyImpl.intern("SUMMARYTYPE"),
073                    KeyImpl.intern("TITLE"),
074                    KeyImpl.intern("TITLETYPE"),
075                    KeyImpl.intern("UPDATEDDATE"),
076                    KeyImpl.intern("URI"),
077                    KeyImpl.intern("XMLBASE")
078            };
079            
080            
081            private XMLReader xmlReader;
082    
083            private String lcInside;
084            private StringBuffer content=new StringBuffer();
085    
086            private boolean insideImage;
087            private boolean insideItem;
088    
089            private Struct image;
090            private Struct properties;
091            private Query items;
092    
093            private Collection.Key inside;
094            
095            /**
096             * Constructor of the class
097             * @param res
098             * @throws IOException
099             * @throws SAXException 
100             * @throws DatabaseException 
101             */
102            public RSSHandler(Resource res) throws IOException, SAXException, DatabaseException {
103                    InputStream is=null;
104                    try {
105                            InputSource source=new InputSource(is=res.getInputStream());
106                            source.setSystemId(res.getPath());
107                            init(DEFAULT_SAX_PARSER,source);
108                    } 
109                    finally {
110                            IOUtil.closeEL(is);
111                    }
112            }
113    
114            /**
115             * Constructor of the class
116             * @param stream
117             * @throws IOException
118             * @throws SAXException 
119             * @throws DatabaseException 
120             */
121            public RSSHandler(InputStream stream) throws IOException, SAXException, DatabaseException {
122                    InputSource is=new InputSource(IOUtil.getReader(stream, SystemUtil.getCharset()));
123                    init(DEFAULT_SAX_PARSER,is);
124            }
125            
126            private void init(String saxParser,InputSource is) throws SAXException, IOException, DatabaseException  {
127                    properties=new StructImpl();
128                    items=new QueryImpl(COLUMNS,0,"query");
129                    xmlReader=XMLReaderFactory.createXMLReader(saxParser);
130                    xmlReader.setContentHandler(this);
131                    xmlReader.setErrorHandler(this);
132                    
133                    //xmlReader.setEntityResolver(new TagLibEntityResolver());
134                    xmlReader.parse(is);
135                    
136                    //properties.setEL("encoding",is.getEncoding());
137                    
138        }
139            
140            public void setDocumentLocator(Locator locator) { 
141                      if (locator instanceof Locator2) {
142                        Locator2 locator2 = (Locator2) locator;
143                        properties.setEL("encoding", locator2.getEncoding());
144                      } 
145                    }
146    
147            /**
148             * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
149             */
150            public void startElement(String uri, String name, String qName, Attributes atts) {
151                    inside = KeyImpl.getInstance(qName);
152                    lcInside=qName.toLowerCase();
153                    if(lcInside.equals("image"))            insideImage=true;
154                    else if(qName.equals("item"))   {
155                            items.addRow();
156                            insideItem=true;
157                    }
158                    else if(lcInside.equals("rss"))         {
159                            String version = atts.getValue("version");
160                            if(!StringUtil.isEmpty(version))
161                                    properties.setEL("version", "rss_"+version);
162                    }
163                    
164                    /* / cloud
165                    else if(!insideItem && lcInside.equals("cloud"))        {
166                            
167                            
168                            
169                            String url = atts.getValue("url");
170                            if(!StringUtil.isEmpty(url))items.setAtEL("LINKHREF", items.getRowCount(), url);
171                            String length = atts.getValue("length");
172                            if(!StringUtil.isEmpty(length))items.setAtEL("LINKLENGTH", items.getRowCount(), length);
173                            String type = atts.getValue("type");
174                            if(!StringUtil.isEmpty(type))items.setAtEL("LINKTYPE", items.getRowCount(), type);
175                    }*/
176                    
177                    
178                    // enclosure
179                    else if(insideItem && lcInside.equals("enclosure"))     {
180                            String url = atts.getValue("url");
181                            if(!StringUtil.isEmpty(url))items.setAtEL("LINKHREF", items.getRowCount(), url);
182                            String length = atts.getValue("length");
183                            if(!StringUtil.isEmpty(length))items.setAtEL("LINKLENGTH", items.getRowCount(), length);
184                            String type = atts.getValue("type");
185                            if(!StringUtil.isEmpty(type))items.setAtEL("LINKTYPE", items.getRowCount(), type);
186                    }
187                    
188                    else if(atts.getLength()>0) {
189                            int len=atts.getLength();
190                            Struct sct=new StructImpl();
191                            for(int i=0;i<len;i++) {
192                                    sct.setEL(atts.getQName(i), atts.getValue(i));
193                            }
194                            properties.setEL(inside, sct);
195                    }
196                    
197                    //<enclosure url="http://www.scripting.com/mp3s/weatherReportDicksPicsVol7.mp3" length="6182912" type="audio/mpeg"/>
198            }
199        
200            /**
201             * Geerbte Methode von org.xml.sax.ContentHandler, 
202             * wird bei durchparsen des XML, beim auftreten eines End-Tag aufgerufen.
203             *  
204             * @see org.xml.sax.ContentHandler#endElement(String, String, String)
205             */
206            public void endElement(String uri, String name, String qName) {
207                    setContent(content.toString().trim());
208                    content=new StringBuffer();
209                    inside=null;
210                    lcInside="";
211                    
212                    if(qName.equals("image")) insideImage=false;
213                    if(qName.equals("item")) insideItem=false;
214            }
215            
216            
217        /** 
218         * Geerbte Methode von org.xml.sax.ContentHandler, 
219             * wird bei durchparsen des XML, zum einlesen des Content eines Body Element aufgerufen.
220             * 
221             * @see org.xml.sax.ContentHandler#characters(char[], int, int)
222             */
223            public void characters (char ch[], int start, int length)       {
224                    content.append(new String(ch,start,length));
225            }
226            
227            private void setContent(String value)   {
228                    if(StringUtil.isEmpty(lcInside)) return;
229                    
230                    if(insideImage) {
231                            if(image==null){
232                                    image=new StructImpl();
233                                    properties.setEL("image",image);
234                            }
235                            image.setEL(inside,value);
236                    }
237                    else if(insideItem)     {
238                            try {
239                                    items.setAt(toItemColumn(inside), items.getRowCount(), value);
240                            } catch (PageException e) {
241                                    //print.err(inside);
242                            }
243                            
244                    }
245                    else {
246                            if(!(StringUtil.isEmpty(value,true) && properties.containsKey(inside)))
247                                    properties.setEL(inside,value);
248                    }       
249        }
250    
251            private Collection.Key toItemColumn(Collection.Key key) {
252                    if(key.equalsIgnoreCase(LINK))                  return RSSLINK;
253                    else if(key.equalsIgnoreCase(DESCRIPTION))return CONTENT;
254                    return key;
255            }
256    
257            /**
258             * @return the properties
259             */
260            public Struct getProperties() {
261                    return properties;
262            }
263    
264            /**
265             * @return the items
266             */
267            public Query getItems() {
268                    return items;
269            }
270            
271            
272            /*public static void main(String[] args) throws IOException, SAXException {
273                    ResourceProvider frp = ResourcesImpl.getFileResourceProvider();
274                    Resource res = frp.getResource("/Users/mic/Projects/Railo/webroot/jm/feed/092.xml");
275                    RSSHandler rss=new RSSHandler(res);
276                    print.out(rss.getProperties());
277                    print.out(rss.getItems());
278                    
279            }*/
280    }