TagIterator (aka Tagiter)

Simple but very useful HTML/XHTML cascading parser.

Usage

  # sample html
  stext = <<-EOF
  <body> This is a test...
    <sub> S1 </sub> <sub> S2 </sub>
    <DL>
      <DT> A1
      <DT> A2
      <DT> A3
    </DL>
    <DL>
      <DT> B1
      <DT> B2
      <DT> B3
    </DL>
    <NEST>
      <P ALIGN="R">TOP</P>
      <NEST>
        <P>SECOND</P>
        <OL>
          <LI>C1
          <LI>C2
          <LI>C3
          <LI>C4
        </OL>
      </NEST>
      <OL>
        <LI>D1
        <LI>D2
        <LI>D3
        <LI>D4
      </OL>
    </NEST>
  </body>
  EOF

  a = TagIterator.new(stext)
  a.first("body") do |y|
    y.nth("dl",2) do |dl|
      dl.enumtag("dt") do |t|
        puts t.text.strip
      end
    end
    y.first("nest") do |n|
      n.first("p") do |c|
        print c.text, ' '
        puts c.attributes.collect{ |k,v| "#{k}=#{v}" }
      end.next("nest") do |m|
        m.first("p") do |c|
          puts c.text
        end.next("ol") do |o|
          o.enumtag("li") do |i| puts i.text.strip end
        end
      end.next("ol") do |o|
        o.enumtag("li") do |i| puts i.text.strip end
      end
    end
  end
  a.each_block("sub") do |y|
    puts y.text.strip
  end

produces

  B1
  B2
  B3
  TOP align=R
  SECOND
  C1
  C2
  C3
  C4
  D1
  D2
  D3
  D4
  S1
  S2
Methods
Attributes
[R] attributes
[RW] option
[R] tag
[R] text
Public Class methods
new(text,tag=nil,attributes={})
# File lib/facets/more/tagiterator.rb, line 117
  def initialize(text,tag=nil,attributes={})
    raise RuntimeError,"Only String accepted" unless text.is_a?(String)
    @text=text
    @option="pi"
    @tag=tag
    @attributes=attributes
    def @attributes.[](aname)
      super aname.downcase
    end
  end
Public Instance methods
collect(*arg)
# File lib/facets/more/tagiterator.rb, line 226
  def collect(*arg)
    a=[]
    each_block(*arg) do |tt| a.push tt end
    a
  end
each_block(tag,closetag=nil) {|self.class.new(@text[s..e],tag,parse_attribute(d))| ...}
# File lib/facets/more/tagiterator.rb, line 202
  def each_block(tag,closetag=nil)
    t=0
    s,d =find_opentag(tag)
    raise RuntimeError,"tag(#{tag}) not found" unless s

    while s do
      if closetag then
        e=find_closetag(closetag,s,tag)
      else
        e=find_closetag(tag,s)
      end
      e=-1 unless e
      yield self.class.new(@text[s..e],tag,parse_attribute(d))
      if e>=0 then 
        t=@text.index('>',e+1)
        t=@text.length unless t
        s,d = find_opentag(tag,t)
      else
        s=false
      end
    end
    self.class.new(text[t+1..-1])
  end
enumcollect(tag)
# File lib/facets/more/tagiterator.rb, line 242
  def enumcollect(tag)
    a=[]
    enumtag(tag) do |t| a.push t end
    a
  end
enumtag(tag) {|self.class.new(@text[s..e],tag,parse_attribute(d))| ...}
# File lib/facets/more/tagiterator.rb, line 232
  def enumtag(tag)
    s,d = find_openenumtag(tag)
    while s do
      e=find_closeenumtag(tag,s+1)
      e=-1 unless e
      yield self.class.new(@text[s..e],tag,parse_attribute(d))
      s,d = find_openenumtag(tag,s)
    end
  end
first(tag,*arg) {|f end end| ...}
# File lib/facets/more/tagiterator.rb, line 199
  def first(tag,*arg) nth(tag,1,*arg) do |f| yield f end end
  alias_method :next, :first

  def each_block(tag,closetag=nil)
    t=0
    s,d =find_opentag(tag)
    raise RuntimeError,"tag(#{tag}) not found" unless s

    while s do
      if closetag then
        e=find_closetag(closetag,s,tag)
      else
        e=find_closetag(tag,s)
      end
      e=-1 unless e
      yield self.class.new(@text[s..e],tag,parse_attribute(d))
      if e>=0 then 
        t=@text.index('>',e+1)
        t=@text.length unless t
        s,d = find_opentag(tag,t)
      else
        s=false
      end
    end
    self.class.new(text[t+1..-1])
  end

  def collect(*arg)
    a=[]
    each_block(*arg) do |tt| a.push tt end
    a
  end

  def enumtag(tag)
    s,d = find_openenumtag(tag)
    while s do
      e=find_closeenumtag(tag,s+1)
      e=-1 unless e
      yield self.class.new(@text[s..e],tag,parse_attribute(d))
      s,d = find_openenumtag(tag,s)
    end
  end

  def enumcollect(tag)
    a=[]
    enumtag(tag) do |t| a.push t end
    a
  end

  def for_this
    yield self
  end

  def get_nth(*arg) r=nil; nth(*arg) do |bl| r=bl end; r; end

  def get_first(*arg) r=nil; first(*arg) do |bl| r=bl end; r; end

  def tagexist?(tag,st=0)
    s=find_element(tag,st)
    if s then true else false end
  end

  def tagnext
    s=@text.index("<")
    return nil unless s
    e=@text.index(">",s)
    return nil unless s
    @text[s..e].scan(/[^<>\s]+/)[0]
  end

  def nth_tailer(tag,n)
    nth(tag,n) do end
  end

end



#  _____         _
# |_   _|__  ___| |_
#   | |/ _ \/ __| __|
#   | |  __/\__ \ |_
#   |_|\___||___/\__|
#

??

for_this() {|self| ...}
# File lib/facets/more/tagiterator.rb, line 248
  def for_this
    yield self
  end
get_first(*arg)
# File lib/facets/more/tagiterator.rb, line 254
  def get_first(*arg) r=nil; first(*arg) do |bl| r=bl end; r; end
get_nth(*arg)
# File lib/facets/more/tagiterator.rb, line 252
  def get_nth(*arg) r=nil; nth(*arg) do |bl| r=bl end; r; end
nth(tag,n,closetag=nil) {|self.class.new(text[s..e],tag,parse_attribute(d))| ...}
# File lib/facets/more/tagiterator.rb, line 176
  def nth(tag,n,closetag=nil)
    raise RuntimeError,"nth: number not specified" unless n
    t=0
    e=s=0   # for their scope
    d=nil

    1.upto(n) do |i|
      s,d = find_opentag(tag,t)
      raise RuntimeError,"tag(#{tag}) not found at(#{i})" unless s

      if closetag then
        e=find_closetag(closetag,s,tag)
      else
        e=find_closetag(tag,s)
      end
      e=-1 unless e
      t=@text.index('>',e+1)
      t=@text.length unless t
    end
    yield self.class.new(text[s..e],tag,parse_attribute(d))
    self.class.new(text[t+1..-1])
  end
nth_tailer(tag,n)
# File lib/facets/more/tagiterator.rb, line 269
  def nth_tailer(tag,n)
    nth(tag,n) do end
  end
tagexist?(tag,st=0)
# File lib/facets/more/tagiterator.rb, line 256
  def tagexist?(tag,st=0)
    s=find_element(tag,st)
    if s then true else false end
  end
tagnext()
# File lib/facets/more/tagiterator.rb, line 261
  def tagnext
    s=@text.index("<")
    return nil unless s
    e=@text.index(">",s)
    return nil unless s
    @text[s..e].scan(/[^<>\s]+/)[0]
  end
Private Instance methods
find_closeenumtag(tag,st=0)
# File lib/facets/more/tagiterator.rb, line 166
  def find_closeenumtag(tag,st=0)
    rex=Regexp.new('<\s*'+tag,@option)
    s=@text.index(rex,st)
    s-=1 if s
    s
  end
find_closetag(tag,st,opentag=nil)
# File lib/facets/more/tagiterator.rb, line 149
  def find_closetag(tag,st,opentag=nil)
    if opentag then
      p=find_element(tag,st)
      q,d = find_opentag(opentag,st)
    else
      p=find_element('/\s*'+tag,st)
      q,d = find_opentag(tag,st)
    end
    p-=1 if p 

    if p and q then if p > q then    # tag nested
      p=find_closetag(tag,find_closetag(tag,q,opentag)+2,opentag)
    end end
    
    return p
  end
find_element(element,st=0)
# File lib/facets/more/tagiterator.rb, line 128
  def  find_elementfind_element(element,st=0)
    rex=Regexp.new('<(\s|\n)*'+element+'(\s|\n|>)',@option)
    @text.index(rex,st)
  end
find_openenumtag(tag,st=0)

Alias for find_opentag

find_opentag(tag,st=0)
This method is also aliased as find_openenumtag
# File lib/facets/more/tagiterator.rb, line 141
  def find_opentag(tag,st=0)
    s=find_element(tag,st)
    return nil unless s

    r=@text.index('>',s)
    return r+1,@text[s+1..r-1]
  end
parse_attribute(attstr)
# File lib/facets/more/tagiterator.rb, line 133
  def parse_attribute(attstr)
    k={}; r={}; 
    attstr.scan(/(\w+)=(\S+)/) do |pt| k[ pt[0] ] = pt[1] end
    attstr.scan(/(\w+)="([^"]*)"/) do |pt| k[ pt[0] ] = pt[1] end
    k.each do |key,val| r[key.downcase]=val end
    r
  end