<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Alexander Dick &#187; agent</title>
	<atom:link href="http://www.adick.at/tag/agent/feed/" rel="self" type="application/rss+xml" />
	<link>http://www.adick.at</link>
	<description>TYPO3, Web-Entwicklung und mehr</description>
	<lastBuildDate>Tue, 17 Jan 2012 10:40:10 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.3.1</generator>
		<item>
		<title>Detect bots via user agent string</title>
		<link>http://www.adick.at/2009/01/20/detect-bots-via-user-agent-string/</link>
		<comments>http://www.adick.at/2009/01/20/detect-bots-via-user-agent-string/#comments</comments>
		<pubDate>Tue, 20 Jan 2009 11:32:49 +0000</pubDate>
		<dc:creator>Alex</dc:creator>
				<category><![CDATA[Web]]></category>
		<category><![CDATA[agent]]></category>
		<category><![CDATA[bot]]></category>
		<category><![CDATA[crawler]]></category>
		<category><![CDATA[flash]]></category>
		<category><![CDATA[fullflash]]></category>
		<category><![CDATA[list]]></category>
		<category><![CDATA[regex]]></category>
		<category><![CDATA[robot]]></category>
		<category><![CDATA[search engine]]></category>
		<category><![CDATA[user]]></category>

		<guid isPermaLink="false">http://www.adick.at/?p=281</guid>
		<description><![CDATA[When making full-flash websites, it's quite effective to deliver html-content to search engines.
To determine whether a visitor is a robot or not, you have to match the visitor's user agent against a list of known bot user agents.]]></description>
		<wfw:commentRss>http://www.adick.at/2009/01/20/detect-bots-via-user-agent-string/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Blocking web crawlers on lighttpd</title>
		<link>http://www.adick.at/2008/09/19/blocking-web-crawlers-in-lighttpd/</link>
		<comments>http://www.adick.at/2008/09/19/blocking-web-crawlers-in-lighttpd/#comments</comments>
		<pubDate>Fri, 19 Sep 2008 11:18:37 +0000</pubDate>
		<dc:creator>Alex</dc:creator>
				<category><![CDATA[Web]]></category>
		<category><![CDATA[403]]></category>
		<category><![CDATA[agent]]></category>
		<category><![CDATA[crawler]]></category>
		<category><![CDATA[deny]]></category>
		<category><![CDATA[forbid]]></category>
		<category><![CDATA[forbidden]]></category>
		<category><![CDATA[lighttpd]]></category>
		<category><![CDATA[nutch]]></category>
		<category><![CDATA[user]]></category>

		<guid isPermaLink="false">http://www.adick.at/?p=154</guid>
		<description><![CDATA[Nutch did ignore my robots.txt (for whatever reason, I was unable to figure out why), so I had to find another way to forbid those directories for the crawler. I finally came up with this neat piece of config for lighty: $HTTP[&#34;useragent&#34;] =~ &#34;(Nutch&#124;Google&#124;FooBar)&#34; { $HTTP[&#34;url&#34;] =~ &#34;^(/one/&#124;/two/&#124;/three/)&#34; { url.access-deny = ( &#34;&#34; ) } [...]]]></description>
		<wfw:commentRss>http://www.adick.at/2008/09/19/blocking-web-crawlers-in-lighttpd/feed/</wfw:commentRss>
		<slash:comments>5</slash:comments>
		</item>
	</channel>
</rss>

