ClickAider

Ruby Module for Searching using the Alexa Web Services API

This post includes an improved version of the query example in Ruby for hitting the Alexa Web Search web service. Sample usage is as follows.

require 'alexa'
Alexa.search('bogle').each {|hit| puts hit.title}

For convenience, you might want to edit the code below to replace INSERT_YOUR_ACCESS_KEY_HERE with your actual access key and INSERT_YOUR_SECRET_ACCESS_KEY with your secret access key; otherwise these will need to be passed in as the AWSAccessKeyId and SecretAccessKey options.


alexa.rb

#/usr/bin/ruby

require "cgi"
require "base64"
require "openssl"
require "digest/sha1"
require "uri"
require "net/https"
require "rexml/document"
require "time"
require "ostruct"

module Alexa

	# search for "query", returning "count" rows starting at "start"
	# Note that the maximum count is twenty.
	def search(query, start = 0, count = 20, options = {})
		options["Query"] = query
		action = options["Action"] ||= "WebSearch"
		options["AWSAccessKeyId"]  ||= "INSERT_YOUR_ACCESS_KEY_HERE"
		options["Start"] = start
		options["Count"] = count
		options["Timeout"] ||= 9000
		options["ResponseGroup"] ||= "Results"
		secret_access_key = options["SecretAccessKey"] || "INSERT_YOUR_SECRET_ACCESS_KEY"
		timestamp =  options["Timestamp"]  = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z")
		options["Signature"] = compute_signature(action, timestamp, secret_access_key)

		url = URI.parse("http://websearch.amazonaws.com?" + query_string(options)) 

		xml = REXML::Document.new( Net::HTTP.get(url) )
		doc = xml.root

		results = AlexaResultSet.new

		begin
			results.total_count = REXML::XPath.first(doc, "//aws:TotalCount").text.to_i
		rescue Exception => e
			results.total_count = 0
		end

		timeout_node = REXML::XPath.first(doc, "//aws:TimedOut")
		results.timed_out = timeout_node and timeout_node.text=="true"

		for element in REXML::XPath.match(doc, "//aws:Result")
			results << (result = AlexaResult.new)
			for field in REXML::XPath.match(element, "aws:Field")
				result[field.attributes['name']] = field.text
			end
		end

		return results
	end

	class AlexaResultSet < Array
		attr_accessor :total_count, :timed_out
	end

	class AlexaResult < Hash
		# define a method called method_name to fetch field_name
		def AlexaResult.field_accessor(method_name, field_name)
			class_eval "def #{method_name.to_s}; self['#{field_name}']; end"
		end

		field_accessor :url, "DataUrl"
		field_accessor :title, "Title"
		field_accessor :size, "CrawlDataSize"
		field_accessor :score, "Score"
	end

	def compute_signature(action, timestamp, secret_access_key)
		digest = OpenSSL::HMAC.digest(OpenSSL::Digest::Digest.new( "sha1" ),
									  secret_access_key, action + timestamp)
		Base64.encode64(digest).strip
	end

	def query_string(options)
		options.to_a.collect {|item|
			item.first.to_s + "=" + CGI::escape(item.last.to_s) }.join("&")
	end

	module_function :search, :compute_signature, :query_string

end