Ruby Module for Searching using the Alexa Web Services API
This post includes an improved version of the query example in Ruby for hitting the Alexa Web Search web service. Sample usage is as follows.
require 'alexa'
Alexa.search('bogle').each {|hit| puts hit.title}
For convenience, you might want to edit the code below to replace INSERT_YOUR_ACCESS_KEY_HERE with your actual access key and INSERT_YOUR_SECRET_ACCESS_KEY with your secret access key; otherwise these will need to be passed in as the AWSAccessKeyId and SecretAccessKey options.
alexa.rb
#/usr/bin/ruby
require "cgi"
require "base64"
require "openssl"
require "digest/sha1"
require "uri"
require "net/https"
require "rexml/document"
require "time"
require "ostruct"
module Alexa
# search for "query", returning "count" rows starting at "start"
# Note that the maximum count is twenty.
def search(query, start = 0, count = 20, options = {})
options["Query"] = query
action = options["Action"] ||= "WebSearch"
options["AWSAccessKeyId"] ||= "INSERT_YOUR_ACCESS_KEY_HERE"
options["Start"] = start
options["Count"] = count
options["Timeout"] ||= 9000
options["ResponseGroup"] ||= "Results"
secret_access_key = options["SecretAccessKey"] || "INSERT_YOUR_SECRET_ACCESS_KEY"
timestamp = options["Timestamp"] = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z")
options["Signature"] = compute_signature(action, timestamp, secret_access_key)
url = URI.parse("http://websearch.amazonaws.com?" + query_string(options))
xml = REXML::Document.new( Net::HTTP.get(url) )
doc = xml.root
results = AlexaResultSet.new
begin
results.total_count = REXML::XPath.first(doc, "//aws:TotalCount").text.to_i
rescue Exception => e
results.total_count = 0
end
timeout_node = REXML::XPath.first(doc, "//aws:TimedOut")
results.timed_out = timeout_node and timeout_node.text=="true"
for element in REXML::XPath.match(doc, "//aws:Result")
results << (result = AlexaResult.new)
for field in REXML::XPath.match(element, "aws:Field")
result[field.attributes['name']] = field.text
end
end
return results
end
class AlexaResultSet < Array
attr_accessor :total_count, :timed_out
end
class AlexaResult < Hash
# define a method called method_name to fetch field_name
def AlexaResult.field_accessor(method_name, field_name)
class_eval "def #{method_name.to_s}; self['#{field_name}']; end"
end
field_accessor :url, "DataUrl"
field_accessor :title, "Title"
field_accessor :size, "CrawlDataSize"
field_accessor :score, "Score"
end
def compute_signature(action, timestamp, secret_access_key)
digest = OpenSSL::HMAC.digest(OpenSSL::Digest::Digest.new( "sha1" ),
secret_access_key, action + timestamp)
Base64.encode64(digest).strip
end
def query_string(options)
options.to_a.collect {|item|
item.first.to_s + "=" + CGI::escape(item.last.to_s) }.join("&")
end
module_function :search, :compute_signature, :query_string
end