#! /usr/bin/env ruby
require "rubygems"
require "nokogiri"
require "open-uri"
require "iconv"
require "rss/maker"
require "webrick"
class SkyrockblogArticle < Hash
def to_xhtml
ret = "
"
unless self[:images_urls].empty?
ret += "
"
self[:images_urls].each { |url| ret += "
" }
ret += "
"
end
unless self[:texts].empty?
ret += "
"
self[:texts].each { |text| ret += "
#{text}
" }
ret += "
"
end
ret += "
"
end
end
class SkyrockblogPage < Array; end
class Skyrockblog
FRENCH_MONTHS = {
"janvier" => 1, "février" => 2, "mars" => 3, "avril" => 4, "mai" => 5,
"juin" => 6, "juillet" => 7, "août" => 8, "septembre" => 9,
"octobre" => 10, "novembre" => 11, "décembre" => 12
}
attr_reader :base_url
protected
@doc = nil
@page = nil
def parse_skyrockblog_date(skyrockblog_date)
skyrockblog_date = Iconv.new("ISO-8859-1", "UTF-8").iconv(skyrockblog_date)
matches = %r{Post. le \S+ (\d+) (\S+) (\d+)\s+(\d+):(\d+)}.
match(skyrockblog_date)
day, month, year, hour, min =
matches[1], FRENCH_MONTHS[matches[2]], matches[3], matches[4], matches[5]
Time.parse("#{year}-#{month}-#{day} #{hour}:#{min}")
end
def page_url(page)
return @base_url if page <= 1
"#{@base_url}&page=#{page}"
end
def fetch_page(page)
unless (page == false || @page == page) && @doc
@doc = Nokogiri::HTML(open(page_url(page || 1)))
@page = page
end
end
def permalink(id)
"http://www.skyrock.com/direct.php/#{id}:#{user_id}"
end
public
def initialize(user_name)
@base_url = "http://www.skyrock.com/blog/blog.php" +
"?pseudo=#{user_name}&__FORCE_LANG=fr_FX"
end
def parse_page(page = 1)
fetch_page(page)
skyrockblog_page = SkyrockblogPage.new
@doc.css(".bloc").each do |article|
next unless aid = article["id"] and /^a-/.match(aid)
id = aid.gsub(/^a-/, "")
title_container = article.at("h2") or next
skyrockblog_article = SkyrockblogArticle.new
skyrockblog_article[:id] = id
skyrockblog_article[:images_urls], skyrockblog_article[:texts] = [ ], [ ]
skyrockblog_article[:title] = title_container.inner_text
article.css("img").each { |image|
skyrockblog_article[:images_urls] << image["src"]
}
article.css("div").each { |div|
skyrockblog_article[:texts] <<
div.inner_text if div["class"] == 'text-container'
}
created_on_plaintext = article.at(".created_on").inner_text
skyrockblog_article[:created_on] = parse_skyrockblog_date(created_on_plaintext)
skyrockblog_article[:permalink] = permalink(skyrockblog_article[:id])
skyrockblog_page << skyrockblog_article
end
skyrockblog_page
end
def pagination
fetch_page(false)
return 1 unless pagination = @doc.at("ul.pagination")
last = 1
pagination.css("a").each { |link|
href = link["href"]
matches = /(\d+)\.html$/.match(href) or next
last = [ last, matches[1].to_i ].max
}
1..last
end
def user_id
fetch_page(false)
matches = /id_skynaute\s*=\s*"?(\d+)"?/.match(@doc.text)
matches[1]
end
def title
fetch_page(false)
@doc.at("title").text
end
def description
fetch_page(false)
@doc.at(".description").text
end
def fetch_articles(nb_max)
found_pages = [ ]
first_page = parse_page(1)
pages = [ *pagination ]
if first_page.size < 2 ||
first_page[0][:created_on] < first_page[1][:created_on]
pages.reverse!
end
pages.each do |page|
parse_page(page).each { |parsed_page| found_pages << parsed_page }
break if found_pages.size >= nb_max
end
found_pages
end
def rss
articles = fetch_articles(15)
rss = RSS::Maker.make("1.0") do |r|
r.encoding = "UTF-8"
r.channel.title = title
r.channel.link = base_url
r.channel.description = description
r.channel.about = description
r.items.do_sort = true
articles.each do |article|
i = r.items.new_item
i.title = article[:title]
i.date = article[:created_on]
i.link = article[:permalink]
i.description = article[:texts].first
i.content_encoded = article.to_xhtml
end
end
rss
end
end
class RSSServlet < WEBrick::HTTPServlet::AbstractServlet
def do_GET(req, res)
unless user_name = req.query['u']
res.status = 412
return
end
unless skyrockblog = Skyrockblog.new(user_name)
res.status = 404
return
end
res.body = skyrockblog.rss.to_xml
res['Content-Type'] = "text/xml"
end
end
server = WEBrick::HTTPServer.new(:Port => 2000)
trap("INT") { server.shutdown }
server.mount("/rss", RSSServlet)
server.start