Class: Rack::Multipart::Parser

Relationships & Source Files
Namespace Children
Classes: `BoundedIO`, `Collector`, `MultipartInfo`
Inherits:	Object
Defined in:	lib/rack/multipart/parser.rb

Overview

Parser handles parsing of multipart/form-data requests.

File Parameter Contents

When processing file uploads, the parser returns a hash containing information about uploaded files. For file parameters, the hash includes:

:filename - The original filename, already URL decoded by the parser
:type - The content type of the uploaded file
:name - The parameter name from the form
:tempfile - A Tempfile object containing the uploaded data
:head - The raw header content for this part

Constant Summary

BUFSIZE =
# File 'lib/rack/multipart/parser.rb', line 54
```
1_048_576
```
CHARSET =
# File 'lib/rack/multipart/parser.rb', line 468
```
"charset"
```
CONTENT_DISPOSITION_MAX_BYTES =
# File 'lib/rack/multipart/parser.rb', line 320
```
1536
```
CONTENT_DISPOSITION_MAX_PARAMS =
# File 'lib/rack/multipart/parser.rb', line 319
```
16
```
EMPTY =
# File 'lib/rack/multipart/parser.rb', line 92
```
MultipartInfo.new(nil, [])
```

REENCODE_DUMMY_ENCODINGS =
# File 'lib/rack/multipart/parser.rb', line 510

{
  # ISO-2022-JP is a legacy but still widely used encoding in Japan
  # Here we convert ISO-2022-JP to UTF-8 so that it can be handled.
  Encoding::ISO_2022_JP => true

  # Other dummy encodings are rarely used and have not been supported yet.
  # Adding support for them will require careful considerations.
}

TEMPFILE_FACTORY =
# File 'lib/rack/multipart/parser.rb', line 56

lambda { |filename, content_type|
  extension = ::File.extname(filename.gsub("\0", '%00'))[0, 129]

  Tempfile.new(["RackMultipart", extension])
}

TEXT_PLAIN =
# File 'lib/rack/multipart/parser.rb', line 55
```
"text/plain"
```

Class Method Summary

.new(boundary, tempfile, bufsize, query_parser) ⇒ Parser constructor
.parse(io, content_length, content_type, tmpfile, bufsize, qp)
.parse_boundary(content_type)

Instance Attribute Summary

#state readonly

Instance Method Summary

#parse(io)
#result
#consume_boundary private

Scan until the we find the start or end of the boundary.
#find_encoding(enc) private

Return the related Encoding object.
#handle_consume_token private
#handle_dummy_encoding(name, body) private
#handle_empty_content!(content) private
#handle_fast_forward private

This handles the initial parser state.
#handle_mime_body private
#handle_mime_head private
#normalize_filename(filename) private
#read_data(io, outbuf) private
#tag_multipart_encoding(filename, content_type, name, body) private

Constructor Details

.new(boundary, tempfile, bufsize, query_parser) ⇒ `Parser`

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 220


def initialize(boundary, tempfile, bufsize, query_parser)
  @query_parser   = query_parser
  @params         = query_parser.make_params
  @bufsize        = bufsize

  @state = :FAST_FORWARD
  @mime_index = 0
  @collector = Collector.new tempfile

  @sbuf = StringScanner.new("".dup)
  @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m
  @body_regex_at_end = /#{@body_regex}\z/m
  @end_boundary_size = boundary.bytesize + 4 # (-- at start, -- at finish)
  @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish)
  @head_regex = /(.*?#{EOL})#{EOL}/m
end

Class Method Details

.parse(io, content_length, content_type, tmpfile, bufsize, qp)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 101


def self.parse(io, content_length, content_type, tmpfile, bufsize, qp)
  return EMPTY if 0 == content_length

  boundary = parse_boundary content_type
  return EMPTY unless boundary

  if boundary.length > 70
    # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary.
    # Most clients use no more than 55 characters.
    raise BoundaryTooLongError, "multipart boundary size too large (#{boundary.length} characters)"
  end

  io = BoundedIO.new(io, content_length) if content_length

  parser = new(boundary, tmpfile, bufsize, qp)
  parser.parse(io)

  parser.result
end

.parse_boundary(content_type)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 94


def self.parse_boundary(content_type)
  return unless content_type
  data = content_type.match(MULTIPART)
  return unless data
  data[1]
end

Instance Attribute Details

#state (readonly)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 218


attr_reader :state

Instance Method Details

#consume_boundary (private)

Scan until the we find the start or end of the boundary. If we find it, return the appropriate symbol for the start or end of the boundary. If we don’t find the start or end of the boundary, clear the buffer and return nil.

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 449


def consume_boundary
  if read_buffer = @sbuf.scan_until(@body_regex)
    read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY
  else
    @sbuf.terminate
    nil
  end
end

#find_encoding(enc) (private)

Return the related Encoding object. However, because enc is submitted by the user, it may be invalid, so use a binary encoding in that case.

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 504


def find_encoding(enc)
  Encoding.find enc
rescue ArgumentError
  Encoding::BINARY
end

#handle_consume_token (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 309


def handle_consume_token
  tok = consume_boundary
  # break if we're at the end of a buffer, but not if it is the end of a field
  @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY)
    :DONE
  else
    :MIME_HEAD
  end
end

#handle_dummy_encoding(name, body) (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 519


def handle_dummy_encoding(name, body)
  # A string object with a 'dummy' encoding does not have full functionality and can cause errors.
  # So here we covert it to UTF-8 so that it can be handled properly.
  if name.encoding.dummy? && REENCODE_DUMMY_ENCODINGS[name.encoding]
    name = name.encode(Encoding::UTF_8)
    body = body.encode(Encoding::UTF_8)
  end
  return name, body
end

#handle_empty_content!(content) (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 529


def handle_empty_content!(content)
  if content.nil? || content.empty?
    raise EmptyContentError
  end
end

#handle_fast_forward (private)

This handles the initial parser state. We read until we find the starting boundary, then we can transition to the next state. If we find the ending boundary, this is an invalid multipart upload, but keep scanning for opening boundary in that case. If no boundary found, we need to keep reading data and retry. It’s highly unlikely the initial read will not consume the boundary. The client would have to deliberately craft a response with the opening boundary beyond the buffer size for that to happen.

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 286


def handle_fast_forward
  while true
    case consume_boundary
    when :BOUNDARY
      # found opening boundary, transition to next state
      @state = :MIME_HEAD
      return
    when :END_BOUNDARY
      # invalid multipart upload
      if @sbuf.pos == @end_boundary_size && @sbuf.rest == EOL
        # stop parsing a buffer if a buffer is only an end boundary.
        @state = :DONE
        return
      end

      # retry for opening boundary
    else
      # no boundary found, keep reading data
      return :want_read
    end
  end
end

#handle_mime_body (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 426


def handle_mime_body
  if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet
    body = body_with_boundary.sub(@body_regex_at_end, '') # remove the boundary from the string
    @collector.on_mime_body @mime_index, body
    @sbuf.pos += body.length + 2 # skip \r\n after the content
    @state = :CONSUME_TOKEN
    @mime_index += 1
  else
    # Save what we have so far
    if @rx_max_size < @sbuf.rest_size
      delta = @sbuf.rest_size - @rx_max_size
      @collector.on_mime_body @mime_index, @sbuf.peek(delta)
      @sbuf.pos += delta
      @sbuf.string = @sbuf.rest
    end
    :want_read
  end
end

#handle_mime_head (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 321


def handle_mime_head
  if @sbuf.scan_until(@head_regex)
    head = @sbuf[1]
    content_type = head[MULTIPART_CONTENT_TYPE, 1]
    if (disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]) &&
        disposition.bytesize <= CONTENT_DISPOSITION_MAX_BYTES

      # ignore actual content-disposition value (should always be form-data)
      i = disposition.index(';')
      disposition.slice!(0, i+1)
      param = nil
      num_params = 0

      # Parse parameter list
      while i = disposition.index('=')
        # Only parse up to max parameters, to avoid potential denial of service
        num_params += 1
        break if num_params > CONTENT_DISPOSITION_MAX_PARAMS

        # Found end of parameter name, ensure forward progress in loop
        param = disposition.slice!(0, i+1)

        # Remove ending equals and preceding whitespace from parameter name
        param.chomp!('=')
        param.lstrip!

        if disposition[0] == '"'
          # Parameter value is quoted, parse it, handling backslash escapes
          disposition.slice!(0, 1)
          value = String.new

          while i = disposition.index(/(["\\])/)
            c = $1

            # Append all content until ending quote or escape
            value << disposition.slice!(0, i)

            # Remove either backslash or ending quote,
            # ensures forward progress in loop
            disposition.slice!(0, 1)

            # stop parsing parameter value if found ending quote
            break if c == '"'

            escaped_char = disposition.slice!(0, 1)
            if param == 'filename' && escaped_char != '"'
              # Possible IE uploaded filename, append both escape backslash and value
              value << c << escaped_char
            else
              # Other only append escaped value
              value << escaped_char
            end
          end
        else
          if i = disposition.index(';')
            # Parameter value unquoted (which may be invalid), value ends at semicolon
            value = disposition.slice!(0, i)
          else
            # If no ending semicolon, assume remainder of line is value and stop
            # parsing
            disposition.strip!
            value = disposition
            disposition = ''
          end
        end

        case param
        when 'name'
          name = value
        when 'filename'
          filename = value
        when 'filename*'
          filename_star = value
        # else
        # ignore other parameters
        end

        # skip trailing semicolon, to proceed to next parameter
        if i = disposition.index(';')
          disposition.slice!(0, i+1)
        end
      end
    else
      name = head[MULTIPART_CONTENT_ID, 1]
    end

    if filename_star
      encoding, _, filename = filename_star.split("'", 3)
      filename = normalize_filename(filename || '')
      filename.force_encoding(find_encoding(encoding))
    elsif filename
      filename = normalize_filename(filename)
    end

    if name.nil? || name.empty?
      name = filename || "#{content_type || TEXT_PLAIN}[]".dup
    end

    @collector.on_mime_head @mime_index, head, filename, content_type, name
    @state = :MIME_BODY
  else
    :want_read
  end
end

#normalize_filename(filename) (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 458


def normalize_filename(filename)
  if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) }
    filename = Utils.unescape_path(filename)
  end

  filename.scrub!

  filename.split(/[\/\\]/).last || String.new
end

#parse(io)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 237


def parse(io)
  outbuf = String.new
  read_data(io, outbuf)

  loop do
    status =
      case @state
      when :FAST_FORWARD
        handle_fast_forward
      when :CONSUME_TOKEN
        handle_consume_token
      when :MIME_HEAD
        handle_mime_head
      when :MIME_BODY
        handle_mime_body
      else # when :DONE
        return
      end

    read_data(io, outbuf) if status == :want_read
  end
end

#read_data(io, outbuf) (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 273


def read_data(io, outbuf)
  content = io.read(@bufsize, outbuf)
  handle_empty_content!(content)
  @sbuf.concat(content)
end

#result

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 260


def result
  @collector.each do |part|
    part.get_data do |data|
      tag_multipart_encoding(part.filename, part.content_type, part.name, data)
      name, data = handle_dummy_encoding(part.name, data)
      @query_parser.normalize_params(@params, name, data)
    end
  end
  MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body)
end

#tag_multipart_encoding(filename, content_type, name, body) (private)

[ GitHub ]

# File 'lib/rack/multipart/parser.rb', line 471


def tag_multipart_encoding(filename, content_type, name, body)
  name = name.to_s
  encoding = Encoding::UTF_8

  name.force_encoding(encoding)

  return if filename

  if content_type
    list         = content_type.split(';')
    type_subtype = list.first
    type_subtype.strip!
    if TEXT_PLAIN == type_subtype
      rest = list.drop 1
      rest.each do |param|
        k, v = param.split('=', 2)
        k.strip!
        v.strip!
        v = v[1..-2] if v.start_with?('"') && v.end_with?('"')
        if k == "charset"
          encoding = find_encoding(v)
        end
      end
    end
  end

  name.force_encoding(encoding)
  body.force_encoding(encoding)
end

Class: Rack::Multipart::Parser

Overview

Constant Summary

Class Method Summary

Instance Attribute Summary

Instance Method Summary

Constructor Details

.new(boundary, tempfile, bufsize, query_parser) ⇒ Parser

Class Method Details

.parse(io, content_length, content_type, tmpfile, bufsize, qp)

.parse_boundary(content_type)

Instance Attribute Details

#state (readonly)

Instance Method Details

#consume_boundary (private)

#find_encoding(enc) (private)

#handle_consume_token (private)

#handle_dummy_encoding(name, body) (private)

#handle_empty_content!(content) (private)

#handle_fast_forward (private)

#handle_mime_body (private)

#handle_mime_head (private)

#normalize_filename(filename) (private)

#parse(io)

#read_data(io, outbuf) (private)

#result

#tag_multipart_encoding(filename, content_type, name, body) (private)

.new(boundary, tempfile, bufsize, query_parser) ⇒ `Parser`