1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env ruby -wKU

EMAIL_PATTERN = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b/i

module HTML
  class Node
    include Enumerable
    
    def initialize
      @children = [ ]
    end

    def <<(child)
      @children << child
    end

    def self.prune=(flag)
      @@prune = flag
    end

    def each(&block)
      @@prune = false   if     self.class == Node
      block.call(self)  unless self.class == Node
      @children.any? { |e| e.each(&block) } unless @@prune
      res = @@prune
      @@prune = false
      res
    end
  end

  class Tag < Node
    attr_writer :close_tag # TODO we should raise if an unexpected end tag is provided
    attr_reader :args
    def initialize(tag)
      super()
      @open_tag, @close_tag = tag, ''

      args = tag.gsub(/^<[a-z0-9]+\s*|\s*>$/i, '')
      @args = Hash[*args.scan(/([a-z0-9]+)\s*=\s*("(?:[^<"]+|<\?.*?\?>)*?"|'(?:[^<']+|<\?.*?\?>)*?')/m).flatten]
    end

    def each(&block)
      unless super(&block)
        block.call(@close_tag) unless @@prune
      end
    end

    def tag
      $&.downcase if @open_tag =~ /[a-z0-9]+/i
    end

    def to_s
      @open_tag
    end
  end

  class Text < Node
    def initialize(text)
      super()
      @text = text
    end

    def to_s
      @text
    end
  end

  module_function

  # Input:  an IO object that holds an HTML page optionally with <% script %> <?php tags ?> (this is why an off-the-shelves parser was no good)
  # Output: a Node object implementing the Enumerable interface (this node is the root of the tree representing the parsed page)
  # the parser is regexp-based so not overly robust
  def parse(io)
    # TODO handle XHTML
    # TODO support <div arg="<% … %>"> (we presently only support <? … ?>)
    pattern = /
          ( <(?:br|hr|img|meta|link|input|base|area|col|frame|param)\b[^>]*> ) # single tag
        | ( <    [a-z0-9]+ (?: [^<>]+ | <\?.*?\?> )* > )                       # open tag
        | ( < \/ [a-z0-9]+ [^>]* > )                                           # close tag
        | ( [^<]+ )                                                            # text
        | ( <!DOCTYPE\b [^>]* > | <!--.*?--> | <\?.*?\?> | <%.*?%> )           # stuff to preserve
      /xmi

    stack = [ Node.new ]
    io.read.scan(pattern) do |m|
      if single_tag = m[0]
        stack[-1] << Tag.new(single_tag)
      elsif start_tag = m[1]
        stack.push(Tag.new(start_tag))
        stack[-2] << stack[-1]
      elsif close_tag = m[2]
        raise "Close tag found but no tags are open" if stack.size == 0
        stack[-1].close_tag = close_tag
        stack.pop
      elsif text = m[3]
        stack[-1] << Text.new(text)
      elsif preserve = m[4]
        stack[-1] << Text.new(preserve)
      else
        raise "Unhandled construct: " + m[5].split("\n").first
      end
    end
    raise "Reaching end of document with unclosed tags (#{stack[1..-1].join(", ")})" unless stack.size == 1
    stack.first
  end
end

def e_js(str)
  str.gsub(/(?=[\\"])/, '\\').gsub(/\n/, '\n').gsub(/[@.\/]/) { |ch| sprintf('\\%03o', ch[0]) }
end

def rot_13(str)
  str.tr('A-Za-z', 'N-ZA-Mn-za-m')
end

def obfuscate(text)
  %{<script type="text/javascript">document.write(} +
  %{"#{e_js(rot_13(text))}".replace(/[a-zA-Z]/g, function(c){return String.fromCharCode((c<="Z"?90:122)>=(c=c.charCodeAt(0)+13)?c:c-26);}));} +
  %{</script><noscript><em>(sorry, we are protecting ourself against spam so JavaScript is required to show the email address)</em></noscript>}
end

HTML.parse(STDIN).map do |e|
  # to_s will give only the text of the current object, i.e. <tag> for <tag>foo</tag>
  # to_a will return the sub-tree as an array, i.e. [ <tag>, foo, </tag> ]
  if e.to_s =~ EMAIL_PATTERN
    STDOUT << obfuscate(e.to_a.join)
    HTML::Node.prune = true # skip descending into current sub-tree
  else
    STDOUT << e.to_s
  end
end