/*
SDX: Documentary System in XML.
Copyright (C) 2000, 2001, 2002  Ministere de la culture et de la communication (France), AJLSM

Ministere de la culture et de la communication,
Mission de la recherche et de la technologie
3 rue de Valois, 75042 Paris Cedex 01 (France)
mrt@culture.fr, michel.bottin@culture.fr

AJLSM, 17, rue Vital Carles, 33000 Bordeaux (France)
sevigny@ajlsm.com

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the
Free Software Foundation, Inc.
59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
or connect to:
http://www.fsf.org/copyleft/gpl.html
*/
/*
 * Created by IntelliJ IDEA.
 * User: rpandey
 * Date: 5 nov. 2002
 * Time: 11:50:24
 * To change template for new class use
 * Code Style | Class Templates options (Tools | IDE Options).
 */
package fr.gouv.culture.sdx.search.lucene.analysis;

import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.ru.RussianCharsets;

import java.io.Reader;

public class Analyzer_ru extends DefaultAnalyzer {

	/* (non-Javadoc)
	 * @see fr.gouv.culture.sdx.search.lucene.analysis.AbstractAnalyzer#getAnalyserType()
	 */
	protected String getAnalyzerType() {
		return Analyzer_ru.ANALYZER_TYPE;
	}
	protected final static String ANALYZER_TYPE="Analyzer_ru";
	
    private String ATTRIBUTE_CHARSET = "charset";
    private RussianAnalyzer russianAnalyzer = null;

    /**

     * Charset for Russian letters.

     * Represents encoding for 32 lowercase Russian letters.

     * Predefined charsets can be taken from RussianCharSets class

     */
    private char[] charset;

    private String CHARSET_UNICODE_RUSSIAN = "unicodeRussian";
    private String CHARSET_CP1251 = "CP1251";
    private String CHARSET_KOI8 = "KOI8";
    private String DEFAULT_CHARSET = CHARSET_UNICODE_RUSSIAN;


    /**
     * Configures this analyzer.
     */
    public void configure(Configuration configuration) throws ConfigurationException {

        // The super class will handle the stop words
        super.configure(configuration);

        //determining the which character set to use
        configureCharset(configuration);

        if (this.stopTable != null)
            this.russianAnalyzer = new RussianAnalyzer(this.charset, stopTable);
        else
            this.russianAnalyzer = new RussianAnalyzer(this.charset);
    }


    private void configureCharset(Configuration configuration) {
        String cs = DEFAULT_CHARSET;
        if (configuration != null)
            cs = configuration.getAttribute(ATTRIBUTE_CHARSET, DEFAULT_CHARSET);
        if (cs.equalsIgnoreCase(CHARSET_UNICODE_RUSSIAN))
            this.charset = RussianCharsets.UnicodeRussian;
        if (cs.equalsIgnoreCase(CHARSET_CP1251))
            this.charset = RussianCharsets.CP1251;
        if (cs.equalsIgnoreCase(CHARSET_KOI8))
            this.charset = RussianCharsets.KOI8;

    }


    /**

     * Creates a TokenStream which tokenizes all the text in the provided Reader.

     *

     * @return  A TokenStream build from a RussianLetterTokenizer filtered with

     *                  RussianLowerCaseFilter, StopFilter, and RussianStemFilter

     */

    public final TokenStream tokenStream(String fieldName, Reader reader) {
        return this.russianAnalyzer.tokenStream(fieldName, reader);
    }


}
