package org.apache.nutch.parse.description;

//JDK import
import java.util.logging.Logger;

//Common imports
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

//Nutch imports
import org.apache.nutch.util.LogUtil;
import org.apache.nutch.fetcher.FetcherOutput;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.parse.Parse;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;

//Lucene imports
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;

public class DescriptionIndexer implements IndexingFilter {
 
	public static final Log LOG = LogFactory.getLog(DescriptionIndexer.class.getName());

	private Configuration conf;

	public DescriptionIndexer() {

	}

	public Document filter(Document doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
	
		String desc = parse.getData().getMeta("x-description");
	
		if(desc != null) {
			Field descriptionField = new Field("x-description", desc, Field.Store.YES, Field.Index.UN_TOKENIZED);
			descriptionField.setBoost(5.0f);
			doc.add(descriptionField);
			LOG.info("Added " + desc + " to the x-description Field");
		}
		
		return doc;
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}
	
	public Configuration getConf() {
		return this.conf;
	}
}

