123456789_123456789_123456789_123456789_123456789_

Module: Sprockets::EncodingUtils

Relationships & Source Files
Defined in: lib/sprockets/encoding_utils.rb

Overview

Internal: HTTP transport encoding and charset detecting related functions. Mixed into Environment.

Constant Summary

Instance Method Summary

Instance Method Details

#base64(str)

Public: Use base64 to encode data.

str - String data

Returns a encoded String

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 72

def base64(str)
  [str].pack("m0")
end

#charlock_detect(str)

Internal: Use Charlock Holmes to detect encoding.

To enable this code path, require ‘charlock_holmes’

Returns encoded String.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 121

def charlock_detect(str)
  if defined? CharlockHolmes::EncodingDetector
    if detected = CharlockHolmes::EncodingDetector.detect(str)
      str.force_encoding(detected[:encoding]) if detected[:encoding]
    end
  end

  str
end

#deflate(str)

Public: Use deflate to compress data.

str - String data

Returns a compressed String

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 18

def deflate(str)
  deflater = Zlib::Deflate.new(
    Zlib::BEST_COMPRESSION,
    -Zlib::MAX_WBITS,
    Zlib::MAX_MEM_LEVEL,
    Zlib::DEFAULT_STRATEGY
  )
  deflater << str
  deflater.finish
end

#detect(str)

Public: Basic string detecter.

Attempts to parse any Unicode BOM otherwise falls back to the environment’s external encoding.

str - ASCII-8BIT encoded String

Returns encoded String.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 99

def detect(str)
  str = detect_unicode_bom(str)

  # Attempt Charlock detection
  if str.encoding == Encoding::BINARY
    charlock_detect(str)
  end

  # Fallback to environment's external encoding
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding.default_external)
  end

  str
end

#detect_css(str)

Public: Detect and strip @charset from CSS style sheet.

str - String.

Returns a encoded String.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 177

def detect_css(str)
  str = detect_unicode_bom(str)

  if name = scan_css_charset(str)
    encoding = Encoding.find(name)
    str = str.dup
    str.force_encoding(encoding)
    len = "@charset \"#{name}\";".encode(encoding).size
    str.slice!(0, len)
    str
  end

  # Fallback to UTF-8
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding::UTF_8)
  end

  str
end

#detect_html(str)

Public: Detect charset from HTML document.

Attempts to parse any Unicode BOM otherwise attempt Charlock detection and finally falls back to the environment’s external encoding.

str - String.

Returns a encoded String.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 244

def detect_html(str)
  str = detect_unicode_bom(str)

  # Attempt Charlock detection
  if str.encoding == Encoding::BINARY
    charlock_detect(str)
  end

  # Fallback to environment's external encoding
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding.default_external)
  end

  str
end

#detect_unicode(str)

Public: Detect Unicode string.

Attempts to parse Unicode BOM and falls back to UTF-8.

str - ASCII-8BIT encoded String

Returns encoded String.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 138

def detect_unicode(str)
  str = detect_unicode_bom(str)

  # Fallback to UTF-8
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding::UTF_8)
  end

  str
end

#detect_unicode_bom(str)

Public: Detect and strip BOM from possible unicode string.

str - ASCII-8BIT encoded String

Returns UTF 8/16/32 encoded String without BOM or the original String if no BOM was present.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 156

def detect_unicode_bom(str)
  bom_bytes = str.byteslice(0, 4).bytes.to_a

  BOM.each do |encoding, bytes|
    if bom_bytes[0, bytes.size] == bytes
      str = str.dup
      str.force_encoding(Encoding::BINARY)
      str.slice!(0, bytes.size)
      str.force_encoding(encoding)
      return str
    end
  end

  return str
end

#gzip(str)

Public: Use gzip to compress data.

str - String data

Returns a compressed String

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 58

def gzip(str)
  io = StringIO.new
  gz = Zlib::GzipWriter.new(io, Zlib::BEST_COMPRESSION)
  gz.mtime = 1
  gz << str
  gz.finish
  io.string
end

#scan_css_charset(str)

Internal: Scan binary CSS string for @charset encoding name.

str - ASCII-8BIT encoded String

Returns encoding String name or nil.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 207

def scan_css_charset(str)
  buf = []
  i = 0

  str.each_byte.each do |byte|
    # Halt on line breaks
    break if byte == 0x0A || byte == 0x0D

    # Only ascii bytes
    next unless 0x0 < byte && byte <= 0xFF

    if i < CHARSET_SIZE
    elsif i == CHARSET_SIZE
      if buf == CHARSET_START
        buf = []
      else
        break
      end
    elsif byte == 0x22
      return buf.pack('C*')
    end

    buf << byte
    i += 1
  end

  nil
end

#unmarshaled_deflated(str, window_bits = -Zlib::MAX_WBITS))

Internal: Unmarshal optionally deflated data.

Checks leading marshal header to see if the bytes are uncompressed otherwise inflate the data an unmarshal.

str - Marshaled String window_bits - Integer deflate window size. See ZLib::Inflate.new()

Returns unmarshaled Object or raises an Exception.

[ GitHub ]

  
# File 'lib/sprockets/encoding_utils.rb', line 38

def unmarshaled_deflated(str, window_bits = -Zlib::MAX_WBITS)
  major, minor = str[0], str[1]
  if major && major.ord == Marshal::MAJOR_VERSION &&
      minor && minor.ord <= Marshal::MINOR_VERSION
    marshaled = str
  else
    begin
      marshaled = Zlib::Inflate.new(window_bits).inflate(str)
    rescue Zlib::DataError
      marshaled = str
    end
  end
  Marshal.load(marshaled)
end