/*
Copyright (C) 2000-2010  Ministere de la culture et de la communication (France), AJLSM
See LICENCE file
 */
package fr.gouv.culture.sdx.search.lucene.analysis.filter;

import java.io.IOException;
import java.util.Set;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

import fr.gouv.culture.sdx.search.lucene.analysis.stemmer.BrazilianStemmer;

/**
 * Based on (copied) the GermanStemFilter
 *
 *
 * @author    Jo&atilde;o Kramer
 *
 *
 * A filter that stemms german words. It supports a table of words that should
 * not be stemmed at all.
 *
 * @author    Gerhard Schwarz
 * @version   $Id: BrazilianStemFilter.java,v 1.1 2007/09/19 15:51:02 malo_pichot Exp $
 */
public final class BrazilianStemFilter extends TokenFilter {

    /**
     * The actual token in the input stream.
     */
    private Token token = null;
    private BrazilianStemmer stemmer = null;
    // MAJ Lucene 2.1.0
    //private Hashtable exclusions = null;
    private Set exclusions = null;

    /**
     * @param in
     */
    public BrazilianStemFilter(TokenStream in) {
        super(in);
        stemmer = new BrazilianStemmer();
    }

    /**
     * Builds a BrazilianStemFilter that uses an exclusiontable.
     * @param in 
     * @param exclusiontable 
     */
    // MAJ Lucene 2.1.0
    //public BrazilianStemFilter(TokenStream in, Hashtable exclusiontable) {
    public BrazilianStemFilter(TokenStream in, Set exclusiontable) {
        this(in);
        this.exclusions = exclusiontable;
    }

    /**
     * @return  Returns the next token in the stream, or null at EOS.
     */
    public final Token next()
            throws IOException {
        if ((token = input.next()) == null) {
            return null;
        }
        // Check the exclusiontable.
        else if (exclusions != null && exclusions.contains(token.termText())) {
            return token;
        } else {
            String s = stemmer.stem(token.termText());
            // If not stemmed, dont waste the time creating a new token.
            if ((s != null) && !s.equals(token.termText())) {
                return new Token(s, 0, s.length(), token.type());
            }
            return token;
        }
    }
}


