package org.apache.nutch.parse.description;

// JDK imports
import java.util.Enumeration;
import java.util.Properties;
import java.util.logging.Logger;

// Nutch imports
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.HTMLMetaTags;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.HtmlParseFilter;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.protocol.Content;

// Commons imports
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

// W3C imports
import org.w3c.dom.DocumentFragment;

public class DescriptionParser implements HtmlParseFilter {

  private static final Log LOG = LogFactory.getLog(DescriptionParser.class.getName());
  
  private Configuration conf;

  /** The Description meta data attribute name */
  public static final String META_DESCRIPTION_NAME = "x-description";

  /**
   * Scan the HTML document looking for a description meta tag.
   */
  public ParseResult filter(Content content, ParseResult parseResult, 
    HTMLMetaTags metaTags, DocumentFragment doc) {
	
    // get parse obj
    Parse parse = parseResult.get(content.getUrl());
	  
    // Trying to find the document's description tag
    String desc = null;

    Properties generalMetaTags = metaTags.getGeneralTags();

    for (Enumeration tagNames = generalMetaTags.propertyNames(); tagNames.hasMoreElements(); ) {
        if (tagNames.nextElement().equals("description")) {
           desc = generalMetaTags.getProperty("description");
           
           if(desc == null)
        	   LOG.info("No description tag for this page");
           else if(desc.equals("")) {
        	   LOG.info("Found an empty description tag");
           } else {
        	   LOG.info("Found a description tag; contents: " + desc);
           }
        }
    }

    if((desc != null) && !(desc.equals(""))) {
        LOG.info("Adding description; contents: " + desc);        
        parse.getData().getContentMeta().set(META_DESCRIPTION_NAME, desc);       
    }

    return parseResult;
  }
  
  
  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  public Configuration getConf() {
    return this.conf;
  }  
}

