railo.runtime.search.lucene2.html
Class HTMLParser
java.lang.Object
org.xml.sax.helpers.DefaultHandler
org.ccil.cowan.tagsoup.Parser
railo.runtime.search.lucene2.html.HTMLParser
- All Implemented Interfaces:
- org.ccil.cowan.tagsoup.ScanHandler, ContentHandler, DTDHandler, EntityResolver, ErrorHandler, LexicalHandler, XMLReader
public final class HTMLParser
- extends org.ccil.cowan.tagsoup.Parser
Fields inherited from class org.ccil.cowan.tagsoup.Parser |
autoDetectorProperty, bogonsEmptyFeature, CDATAElementsFeature, defaultAttributesFeature, externalGeneralEntitiesFeature, externalParameterEntitiesFeature, ignorableWhitespaceFeature, ignoreBogonsFeature, isStandaloneFeature, lexicalHandlerParameterEntitiesFeature, lexicalHandlerProperty, namespacePrefixesFeature, namespacesFeature, resolveDTDURIsFeature, restartElementsFeature, rootBogonsFeature, scannerProperty, schemaProperty, stringInterningFeature, translateColonsFeature, unicodeNormalizationCheckingFeature, useAttributes2Feature, useEntityResolver2Feature, useLocator2Feature, validationFeature, XML11Feature, xmlnsURIsFeature |
Method Summary |
void |
characters(char[] ch,
int start,
int length)
Geerbte Methode von org.xml.sax.ContentHandler,
wird bei durchparsen des XML, zum einlesen des Content eines Body Element aufgerufen. |
void |
endElement(String uri,
String name,
String qName)
|
String |
getAuthor()
|
String |
getCharset()
|
String |
getContent()
|
String |
getCustom1()
|
String |
getCustom2()
|
String |
getCustom3()
|
String |
getCustom4()
|
String |
getKeywords()
|
String |
getSummary()
|
String |
getTitle()
|
boolean |
hasAuthor()
|
boolean |
hasCustom1()
|
boolean |
hasCustom2()
|
boolean |
hasCustom3()
|
boolean |
hasCustom4()
|
boolean |
hasKeywords()
|
void |
parse(File file,
String charset)
parse a concret url |
void |
parse(Reader reader)
|
void |
parse(Resource res,
String charset)
|
void |
startElement(String uri,
String name,
String qName,
Attributes atts)
|
Methods inherited from class org.ccil.cowan.tagsoup.Parser |
adup, aname, aval, cdsect, cmnt, comment, decl, endCDATA, endDTD, endEntity, entity, eof, etag_basic, etag_cdata, etag, getContentHandler, getDTDHandler, getEntity, getEntityResolver, getErrorHandler, getFeature, getProperty, gi, parse, parse, pcdata, pi, pitarget, setContentHandler, setDTDHandler, setEntityResolver, setErrorHandler, setFeature, setProperty, stagc, stage, startCDATA, startDTD, startEntity |
Methods inherited from class org.xml.sax.helpers.DefaultHandler |
endDocument, endPrefixMapping, error, fatalError, ignorableWhitespace, notationDecl, processingInstruction, resolveEntity, setDocumentLocator, skippedEntity, startDocument, startPrefixMapping, unparsedEntityDecl, warning |
HTMLParser
public HTMLParser()
parse
public void parse(File file,
String charset)
throws IOException,
SAXException
- parse a concret url
- Parameters:
file
- charset
-
- Throws:
IOException
SAXException
SAXException
parse
public void parse(Resource res,
String charset)
throws IOException,
SAXException
- Throws:
IOException
SAXException
parse
public void parse(Reader reader)
throws IOException,
SAXException
- Throws:
IOException
SAXException
startElement
public void startElement(String uri,
String name,
String qName,
Attributes atts)
throws SAXException
- Specified by:
startElement
in interface ContentHandler
- Overrides:
startElement
in class DefaultHandler
- Throws:
SAXException
- See Also:
ContentHandler.startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
endElement
public void endElement(String uri,
String name,
String qName)
- Specified by:
endElement
in interface ContentHandler
- Overrides:
endElement
in class DefaultHandler
characters
public void characters(char[] ch,
int start,
int length)
- Geerbte Methode von org.xml.sax.ContentHandler,
wird bei durchparsen des XML, zum einlesen des Content eines Body Element aufgerufen.
- Specified by:
characters
in interface ContentHandler
- Overrides:
characters
in class DefaultHandler
- See Also:
ContentHandler.characters(char[], int, int)
getContent
public String getContent()
- Returns:
- Returns the content.
getTitle
public String getTitle()
- Returns:
- Returns the title.
getCharset
public String getCharset()
- Returns:
- Returns the charset.
getSummary
public String getSummary()
- Returns:
- Returns the summary
getKeywords
public String getKeywords()
- Returns:
- the keywords
hasKeywords
public boolean hasKeywords()
- Returns:
- if keywords exists
getAuthor
public String getAuthor()
- Returns:
- the author
hasAuthor
public boolean hasAuthor()
- Returns:
- if author exists
hasCustom1
public boolean hasCustom1()
hasCustom2
public boolean hasCustom2()
hasCustom3
public boolean hasCustom3()
hasCustom4
public boolean hasCustom4()
getCustom1
public String getCustom1()
- Returns:
- the custom1
getCustom2
public String getCustom2()
- Returns:
- the custom2
getCustom3
public String getCustom3()
- Returns:
- the custom3
getCustom4
public String getCustom4()
- Returns:
- the custom4
Copyright © 2012 Railo