123456789_123456789_123456789_123456789_123456789_

Class: Nokogiri::HTML4::EncodingReader

Do not use. This class is for internal use only.
Relationships & Source Files
Namespace Children
Classes:
Exceptions:
Inherits: Object
Defined in: lib/nokogiri/html4/encoding_reader.rb

Class Method Summary

Instance Attribute Summary

  • #encoding_found readonly

    This method is used by the C extension so that Nokogiri::HTML4::Document#read_io() does not leak memory when EncodingFound is raised.

Instance Method Summary

Constructor Details

.new(io) ⇒ EncodingReader

[ GitHub ]

  
# File 'lib/nokogiri/html4/encoding_reader.rb', line 82

def initialize(io)
  @io = io
  @firstchunk = nil
  @encoding_found = nil
end

Class Method Details

.detect_encoding(chunk)

[ GitHub ]

  
# File 'lib/nokogiri/html4/encoding_reader.rb', line 59

def self.detect_encoding(chunk)
  (m = chunk.match(/\A(<\?xml[ \t\r\n][^>]*>)/)) &&
    (return Nokogiri.XML(m[1]).encoding)

  if Nokogiri.jruby?
    (m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i)) &&
      (return m[4])
    catch(:encoding_found) do
      Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
      nil
    end
  else
    handler = SAXHandler.new
    parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
    begin
      parser << chunk
    rescue
      Nokogiri::SyntaxError
    end
    handler.encoding
  end
end

Instance Attribute Details

#encoding_found (readonly)

This method is used by the C extension so that Nokogiri::HTML4::Document#read_io() does not leak memory when EncodingReader::EncodingFound is raised.

[ GitHub ]

  
# File 'lib/nokogiri/html4/encoding_reader.rb', line 91

attr_reader :encoding_found

Instance Method Details

#read(len)

[ GitHub ]

  
# File 'lib/nokogiri/html4/encoding_reader.rb', line 93

def read(len)
  # no support for a call without len

  unless @firstchunk
    (@firstchunk = @io.read(len)) || return

    # This implementation expects that the first call from
    # htmlReadIO() is made with a length long enough (~1KB) to
    # achieve advanced encoding detection.
    if (encoding = EncodingReader.detect_encoding(@firstchunk))
      # The first chunk is stored for the next read in retry.
      raise @encoding_found = EncodingFound.new(encoding)
    end
  end
  @encoding_found = nil

  ret = @firstchunk.slice!(0, len)
  if (len -= ret.length) > 0
    (rest = @io.read(len)) && ret << (rest)
  end
  if ret.empty?
    nil
  else
    ret
  end
end