railo.runtime.search.lucene2.html
Class HTMLParser

java.lang.Object
  extended by org.xml.sax.helpers.DefaultHandler
      extended by org.ccil.cowan.tagsoup.Parser
          extended by railo.runtime.search.lucene2.html.HTMLParser
All Implemented Interfaces:
org.ccil.cowan.tagsoup.ScanHandler, ContentHandler, DTDHandler, EntityResolver, ErrorHandler, LexicalHandler, XMLReader

public final class HTMLParser
extends org.ccil.cowan.tagsoup.Parser


Field Summary
 
Fields inherited from class org.ccil.cowan.tagsoup.Parser
autoDetectorProperty, bogonsEmptyFeature, CDATAElementsFeature, defaultAttributesFeature, externalGeneralEntitiesFeature, externalParameterEntitiesFeature, ignorableWhitespaceFeature, ignoreBogonsFeature, isStandaloneFeature, lexicalHandlerParameterEntitiesFeature, lexicalHandlerProperty, namespacePrefixesFeature, namespacesFeature, resolveDTDURIsFeature, restartElementsFeature, rootBogonsFeature, scannerProperty, schemaProperty, stringInterningFeature, translateColonsFeature, unicodeNormalizationCheckingFeature, useAttributes2Feature, useEntityResolver2Feature, useLocator2Feature, validationFeature, XML11Feature, xmlnsURIsFeature
 
Constructor Summary
HTMLParser()
           
 
Method Summary
 void characters(char[] ch, int start, int length)
          Geerbte Methode von org.xml.sax.ContentHandler, wird bei durchparsen des XML, zum einlesen des Content eines Body Element aufgerufen.
 void endElement(String uri, String name, String qName)
           
 String getAuthor()
           
 String getCharset()
           
 String getContent()
           
 String getCustom1()
           
 String getCustom2()
           
 String getCustom3()
           
 String getCustom4()
           
 String getKeywords()
           
 String getSummary()
           
 String getTitle()
           
 boolean hasAuthor()
           
 boolean hasCustom1()
           
 boolean hasCustom2()
           
 boolean hasCustom3()
           
 boolean hasCustom4()
           
 boolean hasKeywords()
           
 void parse(File file, String charset)
          parse a concret url
 void parse(Reader reader)
           
 void parse(Resource res, String charset)
           
 void startElement(String uri, String name, String qName, Attributes atts)
           
 
Methods inherited from class org.ccil.cowan.tagsoup.Parser
adup, aname, aval, cdsect, cmnt, comment, decl, endCDATA, endDTD, endEntity, entity, eof, etag_basic, etag_cdata, etag, getContentHandler, getDTDHandler, getEntity, getEntityResolver, getErrorHandler, getFeature, getProperty, gi, parse, parse, pcdata, pi, pitarget, setContentHandler, setDTDHandler, setEntityResolver, setErrorHandler, setFeature, setProperty, stagc, stage, startCDATA, startDTD, startEntity
 
Methods inherited from class org.xml.sax.helpers.DefaultHandler
endDocument, endPrefixMapping, error, fatalError, ignorableWhitespace, notationDecl, processingInstruction, resolveEntity, setDocumentLocator, skippedEntity, startDocument, startPrefixMapping, unparsedEntityDecl, warning
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HTMLParser

public HTMLParser()
Method Detail

parse

public void parse(File file,
                  String charset)
           throws IOException,
                  SAXException
parse a concret url

Parameters:
file -
charset -
Throws:
IOException
SAXException
SAXException

parse

public void parse(Resource res,
                  String charset)
           throws IOException,
                  SAXException
Throws:
IOException
SAXException

parse

public void parse(Reader reader)
           throws IOException,
                  SAXException
Throws:
IOException
SAXException

startElement

public void startElement(String uri,
                         String name,
                         String qName,
                         Attributes atts)
                  throws SAXException
Specified by:
startElement in interface ContentHandler
Overrides:
startElement in class DefaultHandler
Throws:
SAXException
See Also:
ContentHandler.startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)

endElement

public void endElement(String uri,
                       String name,
                       String qName)
Specified by:
endElement in interface ContentHandler
Overrides:
endElement in class DefaultHandler

characters

public void characters(char[] ch,
                       int start,
                       int length)
Geerbte Methode von org.xml.sax.ContentHandler, wird bei durchparsen des XML, zum einlesen des Content eines Body Element aufgerufen.

Specified by:
characters in interface ContentHandler
Overrides:
characters in class DefaultHandler
See Also:
ContentHandler.characters(char[], int, int)

getContent

public String getContent()
Returns:
Returns the content.

getTitle

public String getTitle()
Returns:
Returns the title.

getCharset

public String getCharset()
Returns:
Returns the charset.

getSummary

public String getSummary()
Returns:
Returns the summary

getKeywords

public String getKeywords()
Returns:
the keywords

hasKeywords

public boolean hasKeywords()
Returns:
if keywords exists

getAuthor

public String getAuthor()
Returns:
the author

hasAuthor

public boolean hasAuthor()
Returns:
if author exists

hasCustom1

public boolean hasCustom1()

hasCustom2

public boolean hasCustom2()

hasCustom3

public boolean hasCustom3()

hasCustom4

public boolean hasCustom4()

getCustom1

public String getCustom1()
Returns:
the custom1

getCustom2

public String getCustom2()
Returns:
the custom2

getCustom3

public String getCustom3()
Returns:
the custom3

getCustom4

public String getCustom4()
Returns:
the custom4


Copyright © 2012 Railo