Tried to make it easier to read...
@@ -3,58 +3,60 @@ | ||
3 | 3 | require 'open-uri' |
4 | 4 | require 'rss' |
5 | 5 | |
6 | -debug = true if ARGV[0] == "-d" | |
6 | +debug = true if ARGV[0] == '-d' | |
7 | 7 | |
8 | 8 | url = 'http://www.st.ryukoku.ac.jp/~kjm/security/memo/' |
9 | -url = './index.html' if debug | |
10 | -puts "opening #{url}" if debug | |
11 | -doc = Nokogiri::HTML(open(url)) | |
9 | +url = './index.html' if debug ### 何度もアクセスすると悪いので | |
10 | +ttl = '60' ### cron の設定に合わせて分単位で指定 | |
11 | +out = 'shm.rss' | |
12 | 12 | |
13 | -### FIXME: any better ways there? | |
14 | -doc.css('a[href^="/~kjm/"]').each do |anc| | |
15 | - anc['href'] = 'http://www.st.ryukoku.ac.jp' + anc['href'] | |
16 | - puts "prefixed: #{anc['href']}" if debug | |
17 | -end | |
13 | +open(url) do |html| | |
14 | + doc = Nokogiri::HTML(html) ### 最悪でも new するのかな | |
18 | 15 | |
19 | -### Which version should we use? | |
20 | -rss = RSS::Maker.make("2.0") do |xml| | |
21 | - xml.channel.title = doc.title | |
22 | - xml.channel.link = url | |
23 | - xml.channel.description = doc.css('div.NORMAL').first.children | |
24 | - p xml.channel if debug | |
16 | + ### 相対パスを絶対パスに。格好いい方法ないのかな | |
17 | + doc.css('a[href^="/~kjm/"]').each do |anc| | |
18 | + anc['href'] = 'http://www.st.ryukoku.ac.jp' + anc['href'] | |
19 | + puts "prefixed: #{anc['href']}" if debug | |
20 | + end | |
25 | 21 | |
26 | - doc.css('a.NU').each do |link| | |
27 | - next if link.parent.name == "h2" | |
28 | - puts "processing: #{link}" if debug | |
29 | - i = xml.items.new_item | |
30 | - ### a"》", span" ", content | |
31 | - i.title = link.next.next.content | |
32 | - i.link = link['href'] | |
33 | - if link.parent.name == "p" | |
34 | - ### Normal short items | |
35 | - i.description = link.parent.parent.children | |
36 | - elsif link.parent.name == "h3" | |
37 | - ### "various", "tuiki" etc | |
38 | - i.description = link.parent.next.next | |
39 | - else | |
40 | - i.description = "Something wrong" | |
22 | + rss = RSS::Maker.make('2.0') do |xml| | |
23 | + xml.channel.title = doc.title | |
24 | + xml.channel.link = url | |
25 | + p xml.channel if debug | |
26 | + | |
27 | + ### 「追いかけてみるテストです」のあたりにしてみた | |
28 | + xml.channel.description = doc.css('div.NORMAL').first.children | |
29 | + | |
30 | + doc.css('a.NU').each do |link| | |
31 | + next if link.parent.name == "h2" ### その中にまた a.NU がある | |
32 | + | |
33 | + puts "processing: #{link}" if debug | |
34 | + i = xml.items.new_item | |
35 | + ### "》" の次が空 span で、その次がリンクかな | |
36 | + i.title = link.next.next.content | |
37 | + i.link = link['href'] | |
38 | + if link.parent.name == 'p' ### 大部分の一行もの | |
39 | + i.description = link.parent.parent.children | |
40 | + elsif link.parent.name == 'h3' ### 「いろいろ」とか「追記」 | |
41 | + i.description = link.parent.next.next | |
42 | + else | |
43 | + i.description = '(HTML のパースに失敗しました)' | |
44 | + end | |
45 | + ### アンカーから日付だけ取得するハック | |
46 | + i.date = Time.parse(/#([0-9]{8})/.match(link['href'])[1]) | |
47 | + | |
48 | + if debug | |
49 | + puts " #{link.parent.name}: Title: #{i.title}" | |
50 | + puts " Link: #{i.link}" | |
51 | + puts " Date: #{i.date}" | |
52 | + puts "" ### description は長いから出力しない | |
53 | + end | |
41 | 54 | end |
42 | - i.date = Time.parse(/#([0-9]{8})/.match(link['href'])[1]) | |
43 | 55 | |
44 | - if debug | |
45 | - puts " #{link.parent.name}: Title: #{i.title}" | |
46 | - puts " Link: #{i.link}" | |
47 | - puts " Date: #{i.date}" | |
48 | - puts "" ### description is too long to put here | |
49 | - end | |
56 | + xml.channel.ttl = ttl | |
50 | 57 | end |
51 | 58 | |
52 | - ### TTL depends on your cron settings | |
53 | - xml.channel.ttl = "60" ### (in minutes) | |
59 | + File.open(out, 'w') do |f| | |
60 | + f.write(rss.to_s) | |
61 | + end | |
54 | 62 | end |
55 | - | |
56 | -### Lazy: should check before writing | |
57 | -File.open("shm.rss", "w") do |f| | |
58 | - f.write(rss.to_s) | |
59 | -end | |
60 | - |