001/**
002 *
003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved.
004 *
005 * This library is free software; you can redistribute it and/or
006 * modify it under the terms of the GNU Lesser General Public
007 * License as published by the Free Software Foundation; either 
008 * version 2.1 of the License, or (at your option) any later version.
009 * 
010 * This library is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013 * Lesser General Public License for more details.
014 * 
015 * You should have received a copy of the GNU Lesser General Public 
016 * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
017 * 
018 **/
019package lucee.runtime.search.lucene2.analyzer;
020
021import java.io.Reader;
022
023import org.apache.lucene.analysis.Analyzer;
024import org.apache.lucene.analysis.TokenStream;
025import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
026
027
028/**
029 * <p>Analyzer for Portuguese language</p>
030 * <p><a href="PortugueseAnalyzer.java.html"><i>View Source</i></a></p>
031 * <p/>
032 *
033 * @author Andrey Grebnev <a href="mailto:andrey.grebnev@blandware.com">&lt;andrey.grebnev@blandware.com&gt;</a>
034 * @version $Revision: 1.3 $ $Date: 2005/02/24 19:51:22 $
035 */ 
036public final class PortugueseAnalyzer extends Analyzer {
037
038        private static SnowballAnalyzer analyzer;
039
040        private String PORTUGUESE_STOP_WORDS[] = {
041
042                "a", "ainda", "alem", "ambas", "ambos", "antes",
043                "ao", "aonde", "aos", "apos", "aquele", "aqueles",
044                "as", "assim", "com", "como", "contra", "contudo",
045                "cuja", "cujas", "cujo", "cujos", "da", "das", "de",
046                "dela", "dele", "deles", "demais", "depois", "desde",
047                "desta", "deste", "dispoe", "dispoem", "diversa",
048                "diversas", "diversos", "do", "dos", "durante", "e",
049                "ela", "elas", "ele", "eles", "em", "entao", "entre",
050                "essa", "essas", "esse", "esses", "esta", "estas",
051                "este", "estes", "ha", "isso", "isto", "logo", "mais",
052                "mas", "mediante", "menos", "mesma", "mesmas", "mesmo",
053                "mesmos", "na", "nas", "nao", "nas", "nem", "nesse", "neste",
054                "nos", "o", "os", "ou", "outra", "outras", "outro", "outros",
055                "pelas", "pelas", "pelo", "pelos", "perante", "pois", "por",
056                "porque", "portanto", "proprio", "propios", "quais", "qual",
057                "qualquer", "quando", "quanto", "que", "quem", "quer", "se",
058                "seja", "sem", "sendo", "seu", "seus", "sob", "sobre", "sua",
059                "suas", "tal", "tambem", "teu", "teus", "toda", "todas", "todo",
060                "todos", "tua", "tuas", "tudo", "um", "uma", "umas", "uns"};
061
062        /**
063         * Creates new instance of SpanishAnalyzer
064         */
065        public PortugueseAnalyzer() {
066                analyzer = new SnowballAnalyzer("Portuguese", PORTUGUESE_STOP_WORDS);
067        }
068
069        public PortugueseAnalyzer(String stopWords[]) {
070                analyzer = new SnowballAnalyzer("Portuguese", stopWords);
071        }
072
073        @Override
074    public TokenStream tokenStream(String fieldName, Reader reader) {
075                return analyzer.tokenStream(fieldName, reader);
076        }
077}