Web からテキストを取得してハッシュに変換する
Web からデータを取得して、あとは Mix-in で取り込んだ Filter 機能でデータをごりごり加工していく。
#!/usr/bin/ruby
require 'uri'
require 'net/http'
Net::HTTP.version_1_2
module Rire
class Mapping
def self.define(&block)
m = new
m.instance_eval(&block) unless block.nil?
m.extend(Rire::Filter)
m
end
def initialize
@entries = []
@options = {}
@base_url = nil
end
def extract_capture(regexp, attr_names, &block)
@options[:extract_capture] = {} unless @options.key?(:extract_capture)
@options[:extract_capture][:regexp] = regexp
@options[:extract_capture][:attr_names] = attr_names
@options[:extract_capture][:block] = block
end
def base_url(url = nil)
return nil if url.nil? || url.empty?
@base_url = URI.parse(url)
end
def fetch(url = nil, &block)
# TODO: 例外?
return self unless @base_url.nil? || url.nil? || block.nil?
t = ''
if !block.nil?
t = block.call
else
if @base_url.nil?
uri = URI.parse(url)
else
uri = @base_url
end
Net::HTTP.start(uri.host, uri.port) do |http|
req = Net::HTTP::Get.new(uri.request_uri)
# req["user-agent"] = @user_agent unless @user_agent.empty?
# req.basic_auth(@username, @password) if @username && @password
res = http.request(req)
t = res.body
end
end
_extract(t)
end
def clear
@entries = []
self
end
def _extract(t)
extract_capture = @options[:extract_capture][:regexp]
attr_names = @options[:extract_capture][:attr_names]
after_block = @options[:extract_capture][:block]
@entries = []
t.gsub(extract_capture) do |s|
item = {}
# TODO: Regexp.last_match.length
attr_names.length.times do |i|
item[attr_names[i]] = Regexp.last_match[i + 1]
end
item.instance_eval(&after_block)
@entries << item
end
self
end
end
module Filter
def to_hash
@entries
end
def apply(&block)
@entries = @entries.map(&block) unless block.nil?
self
end
def compact
@entries = @entries.compact
self
end
def print
p @entries
self
end
end
end
if $0 == __FILE__
s = <<EOD
<feed>
<entry>
<name>user01</name>
<comment>comment01</comment>
</entry>
<entry>
<name>user03</name>
<comment>comment03</comment>
</entry>
<entry>
<name>user05</name>
<comment>comment05</comment>
</entry>
</feed>
EOD
f = Rire::Mapping.define do
base_url 'http://localhost:8080/hoge.txt'
extract_capture %r{<name>(.+?)</name>\s+<comment>(.+?)</comment>}m, [:name, :comment] do |e|
e[:comment] = "hoge" if e[:name].eql?('user05')
end
end
require 'pp'
require 'open-uri'
f._extract(s).print.clear
f.fetch { open('http://localhost:8080/hoge.txt').read }.print
f.fetch('http://localhost:8080/hoge.txt').print
f.fetch.print
end