001/** 002 * 003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved. 004 * 005 * This library is free software; you can redistribute it and/or 006 * modify it under the terms of the GNU Lesser General Public 007 * License as published by the Free Software Foundation; either 008 * version 2.1 of the License, or (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013 * Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public 016 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 017 * 018 **/ 019package lucee.runtime.search.lucene2.analyzer; 020 021 022import java.io.Reader; 023 024import org.apache.lucene.analysis.Analyzer; 025import org.apache.lucene.analysis.TokenStream; 026import org.apache.lucene.analysis.snowball.SnowballAnalyzer; 027 028 029/** 030 * <p>Analyzer for Spanish language</p> 031 * <p><a href="SpanishAnalyzer.java.html"><i>View Source</i></a></p> 032 * <p/> 033 * 034 * @author Andrey Grebnev <a href="mailto:andrey.grebnev@blandware.com"><andrey.grebnev@blandware.com></a> 035 * @version $Revision: 1.2 $ $Date: 2005/02/24 19:51:22 $ 036 */ 037public final class SpanishAnalyzer extends Analyzer { 038 039 private static SnowballAnalyzer analyzer; 040 041 private String SPANISH_STOP_WORDS[] = { 042 043 "un", "una", "unas", "unos", "uno", "sobre", "todo", "tambien", "tras", 044 "otro", "algun", "alguno", "alguna", 045 046 "algunos", "algunas", "ser", "es", "soy", "eres", "somos", "sois", "estoy", 047 "esta", "estamos", "estais", 048 049 "estan", "en", "para", "atras", "porque", "por que", "estado", "estaba", 050 "ante", "antes", "siendo", 051 052 "ambos", "pero", "por", "poder", "puede", "puedo", "podemos", "podeis", 053 "pueden", "fui", "fue", "fuimos", 054 055 "fueron", "hacer", "hago", "hace", "hacemos", "haceis", "hacen", "cada", 056 "fin", "incluso", "primero", 057 058 "desde", "conseguir", "consigo", "consigue", "consigues", "conseguimos", 059 "consiguen", "ir", "voy", "va", 060 061 "vamos", "vais", "van", "vaya", "bueno", "ha", "tener", "tengo", "tiene", 062 "tenemos", "teneis", "tienen", 063 064 "el", "la", "lo", "las", "los", "su", "aqui", "mio", "tuyo", "ellos", 065 "ellas", "nos", "nosotros", "vosotros", 066 067 "vosotras", "si", "dentro", "solo", "solamente", "saber", "sabes", "sabe", 068 "sabemos", "sabeis", "saben", 069 070 "ultimo", "largo", "bastante", "haces", "muchos", "aquellos", "aquellas", 071 "sus", "entonces", "tiempo", 072 073 "verdad", "verdadero", "verdadera", "cierto", "ciertos", "cierta", 074 "ciertas", "intentar", "intento", 075 076 "intenta", "intentas", "intentamos", "intentais", "intentan", "dos", "bajo", 077 "arriba", "encima", "usar", 078 079 "uso", "usas", "usa", "usamos", "usais", "usan", "emplear", "empleo", 080 "empleas", "emplean", "ampleamos", 081 082 "empleais", "valor", "muy", "era", "eras", "eramos", "eran", "modo", "bien", 083 "cual", "cuando", "donde", 084 085 "mientras", "quien", "con", "entre", "sin", "trabajo", "trabajar", 086 "trabajas", "trabaja", "trabajamos", 087 088 "trabajais", "trabajan", "podria", "podrias", "podriamos", "podrian", 089 "podriais", "yo", "aquel", "mi", 090 091 "de", "a", "e", "i", "o", "u"}; 092 093 /** 094 * Creates new instance of SpanishAnalyzer 095 */ 096 public SpanishAnalyzer() { 097 analyzer = new SnowballAnalyzer("Spanish", SPANISH_STOP_WORDS); 098 } 099 100 public SpanishAnalyzer(String stopWords[]) { 101 analyzer = new SnowballAnalyzer("Spanish", stopWords); 102 } 103 104 @Override 105 public TokenStream tokenStream(String fieldName, Reader reader) { 106 return analyzer.tokenStream(fieldName, reader); 107 } 108}