001/**
002 *
003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved.
004 *
005 * This library is free software; you can redistribute it and/or
006 * modify it under the terms of the GNU Lesser General Public
007 * License as published by the Free Software Foundation; either 
008 * version 2.1 of the License, or (at your option) any later version.
009 * 
010 * This library is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013 * Lesser General Public License for more details.
014 * 
015 * You should have received a copy of the GNU Lesser General Public 
016 * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
017 * 
018 **/
019package lucee.runtime.search.lucene2.analyzer;
020
021
022import java.io.Reader;
023
024import org.apache.lucene.analysis.Analyzer;
025import org.apache.lucene.analysis.TokenStream;
026import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
027
028
029/**
030 * <p>Analyzer for Spanish language</p>
031 * <p><a href="SpanishAnalyzer.java.html"><i>View Source</i></a></p>
032 * <p/>
033 *
034 * @author Andrey Grebnev <a href="mailto:andrey.grebnev@blandware.com">&lt;andrey.grebnev@blandware.com&gt;</a>
035 * @version $Revision: 1.2 $ $Date: 2005/02/24 19:51:22 $
036 */
037public final class SpanishAnalyzer extends Analyzer {
038
039        private static SnowballAnalyzer analyzer;
040
041        private String SPANISH_STOP_WORDS[] = {
042
043                "un", "una", "unas", "unos", "uno", "sobre", "todo", "tambien", "tras",
044                "otro", "algun", "alguno", "alguna",
045
046                "algunos", "algunas", "ser", "es", "soy", "eres", "somos", "sois", "estoy",
047                "esta", "estamos", "estais",
048
049                "estan", "en", "para", "atras", "porque", "por que", "estado", "estaba",
050                "ante", "antes", "siendo",
051
052                "ambos", "pero", "por", "poder", "puede", "puedo", "podemos", "podeis",
053                "pueden", "fui", "fue", "fuimos",
054
055                "fueron", "hacer", "hago", "hace", "hacemos", "haceis", "hacen", "cada",
056                "fin", "incluso", "primero",
057
058                "desde", "conseguir", "consigo", "consigue", "consigues", "conseguimos",
059                "consiguen", "ir", "voy", "va",
060
061                "vamos", "vais", "van", "vaya", "bueno", "ha", "tener", "tengo", "tiene",
062                "tenemos", "teneis", "tienen",
063
064                "el", "la", "lo", "las", "los", "su", "aqui", "mio", "tuyo", "ellos",
065                "ellas", "nos", "nosotros", "vosotros",
066
067                "vosotras", "si", "dentro", "solo", "solamente", "saber", "sabes", "sabe",
068                "sabemos", "sabeis", "saben",
069
070                "ultimo", "largo", "bastante", "haces", "muchos", "aquellos", "aquellas",
071                "sus", "entonces", "tiempo",
072
073                "verdad", "verdadero", "verdadera", "cierto", "ciertos", "cierta",
074                "ciertas", "intentar", "intento",
075
076                "intenta", "intentas", "intentamos", "intentais", "intentan", "dos", "bajo",
077                "arriba", "encima", "usar",
078
079                "uso", "usas", "usa", "usamos", "usais", "usan", "emplear", "empleo",
080                "empleas", "emplean", "ampleamos",
081
082                "empleais", "valor", "muy", "era", "eras", "eramos", "eran", "modo", "bien",
083                "cual", "cuando", "donde",
084
085                "mientras", "quien", "con", "entre", "sin", "trabajo", "trabajar",
086                "trabajas", "trabaja", "trabajamos",
087
088                "trabajais", "trabajan", "podria", "podrias", "podriamos", "podrian",
089                "podriais", "yo", "aquel", "mi",
090
091                "de", "a", "e", "i", "o", "u"};
092
093        /**
094         * Creates new instance of SpanishAnalyzer
095         */
096        public SpanishAnalyzer() {
097                analyzer = new SnowballAnalyzer("Spanish", SPANISH_STOP_WORDS);
098        }
099
100        public SpanishAnalyzer(String stopWords[]) {
101                analyzer = new SnowballAnalyzer("Spanish", stopWords);
102        }
103
104        @Override
105    public TokenStream tokenStream(String fieldName, Reader reader) {
106                return analyzer.tokenStream(fieldName, reader);
107        }
108}