Class: Rack::Multipart::Parser
Relationships & Source Files | |
Namespace Children | |
Classes:
| |
Inherits: | Object |
Defined in: | lib/rack/multipart/parser.rb |
Constant Summary
-
BUFSIZE =
# File 'lib/rack/multipart/parser.rb', line 401_048_576
-
CHARSET =
# File 'lib/rack/multipart/parser.rb', line 448"charset"
-
CONTENT_DISPOSITION_MAX_BYTES =
# File 'lib/rack/multipart/parser.rb', line 3001536
-
CONTENT_DISPOSITION_MAX_PARAMS =
# File 'lib/rack/multipart/parser.rb', line 29916
-
EMPTY =
# File 'lib/rack/multipart/parser.rb', line 78MultipartInfo.new(nil, [])
-
REENCODE_DUMMY_ENCODINGS =
# File 'lib/rack/multipart/parser.rb', line 490{ # ISO-2022-JP is a legacy but still widely used encoding in Japan # Here we convert ISO-2022-JP to UTF-8 so that it can be handled. Encoding::ISO_2022_JP => true # Other dummy encodings are rarely used and have not been supported yet. # Adding support for them will require careful considerations. }
-
TEMPFILE_FACTORY =
# File 'lib/rack/multipart/parser.rb', line 42lambda { |filename, content_type| extension = ::File.extname(filename.gsub("\0", '%00'))[0, 129] Tempfile.new(["RackMultipart", extension]) }
-
TEXT_PLAIN =
# File 'lib/rack/multipart/parser.rb', line 41"text/plain"
Class Method Summary
Instance Attribute Summary
- #state readonly
Instance Method Summary
- #parse(io)
- #result
-
#consume_boundary
private
Scan until the we find the start or end of the boundary.
-
#find_encoding(enc)
private
Return the related Encoding object.
- #handle_consume_token private
- #handle_dummy_encoding(name, body) private
- #handle_empty_content!(content) private
-
#handle_fast_forward
private
This handles the initial parser state.
- #handle_mime_body private
- #handle_mime_head private
- #normalize_filename(filename) private
- #read_data(io, outbuf) private
- #tag_multipart_encoding(filename, content_type, name, body) private
Constructor Details
.new(boundary, tempfile, bufsize, query_parser) ⇒ Parser
# File 'lib/rack/multipart/parser.rb', line 200
def initialize(boundary, tempfile, bufsize, query_parser) @query_parser = query_parser @params = query_parser.make_params @bufsize = bufsize @state = :FAST_FORWARD @mime_index = 0 @collector = Collector.new tempfile @sbuf = StringScanner.new("".dup) @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m @body_regex_at_end = /#{@body_regex}\z/m @end_boundary_size = boundary.bytesize + 4 # (-- at start, -- at finish) @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish) @head_regex = /(.*?#{EOL})#{EOL}/m end
Class Method Details
.parse(io, content_length, content_type, tmpfile, bufsize, qp)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 87
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp) return EMPTY if 0 == content_length boundary = parse_boundary content_type return EMPTY unless boundary if boundary.length > 70 # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary. # Most clients use no more than 55 characters. raise BoundaryTooLongError, "multipart boundary size too large (#{boundary.length} characters)" end io = BoundedIO.new(io, content_length) if content_length parser = new(boundary, tmpfile, bufsize, qp) parser.parse(io) parser.result end
.parse_boundary(content_type)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 80
def self.parse_boundary(content_type) return unless content_type data = content_type.match(MULTIPART) return unless data data[1] end
Instance Attribute Details
#state (readonly)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 198
attr_reader :state
Instance Method Details
#consume_boundary (private)
Scan until the we find the start or end of the boundary. If we find it, return the appropriate symbol for the start or end of the boundary. If we don’t find the start or end of the boundary, clear the buffer and return nil.
# File 'lib/rack/multipart/parser.rb', line 429
def consume_boundary if read_buffer = @sbuf.scan_until(@body_regex) read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY else @sbuf.terminate nil end end
#find_encoding(enc) (private)
Return the related Encoding object. However, because enc is submitted by the user, it may be invalid, so use a binary encoding in that case.
# File 'lib/rack/multipart/parser.rb', line 484
def find_encoding(enc) Encoding.find enc rescue ArgumentError Encoding::BINARY end
#handle_consume_token (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 289
def handle_consume_token tok = consume_boundary # break if we're at the end of a buffer, but not if it is the end of a field @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY) :DONE else :MIME_HEAD end end
#handle_dummy_encoding(name, body) (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 499
def handle_dummy_encoding(name, body) # A string object with a 'dummy' encoding does not have full functionality and can cause errors. # So here we covert it to UTF-8 so that it can be handled properly. if name.encoding.dummy? && REENCODE_DUMMY_ENCODINGS[name.encoding] name = name.encode(Encoding::UTF_8) body = body.encode(Encoding::UTF_8) end return name, body end
#handle_empty_content!(content) (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 509
def handle_empty_content!(content) if content.nil? || content.empty? raise EmptyContentError end end
#handle_fast_forward (private)
This handles the initial parser state. We read until we find the starting boundary, then we can transition to the next state. If we find the ending boundary, this is an invalid multipart upload, but keep scanning for opening boundary in that case. If no boundary found, we need to keep reading data and retry. It’s highly unlikely the initial read will not consume the boundary. The client would have to deliberately craft a response with the opening boundary beyond the buffer size for that to happen.
# File 'lib/rack/multipart/parser.rb', line 266
def handle_fast_forward while true case consume_boundary when :BOUNDARY # found opening boundary, transition to next state @state = :MIME_HEAD return when :END_BOUNDARY # invalid multipart upload if @sbuf.pos == @end_boundary_size && @sbuf.rest == EOL # stop parsing a buffer if a buffer is only an end boundary. @state = :DONE return end # retry for opening boundary else # no boundary found, keep reading data return :want_read end end end
#handle_mime_body (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 406
def handle_mime_body if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet body = body_with_boundary.sub(@body_regex_at_end, '') # remove the boundary from the string @collector.on_mime_body @mime_index, body @sbuf.pos += body.length + 2 # skip \r\n after the content @state = :CONSUME_TOKEN @mime_index += 1 else # Save what we have so far if @rx_max_size < @sbuf.rest_size delta = @sbuf.rest_size - @rx_max_size @collector.on_mime_body @mime_index, @sbuf.peek(delta) @sbuf.pos += delta @sbuf.string = @sbuf.rest end :want_read end end
#handle_mime_head (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 301
def handle_mime_head if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if (disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]) && disposition.bytesize <= CONTENT_DISPOSITION_MAX_BYTES # ignore actual content-disposition value (should always be form-data) i = disposition.index(';') disposition.slice!(0, i+1) param = nil num_params = 0 # Parse parameter list while i = disposition.index('=') # Only parse up to max parameters, to avoid potential denial of service num_params += 1 break if num_params > CONTENT_DISPOSITION_MAX_PARAMS # Found end of parameter name, ensure forward progress in loop param = disposition.slice!(0, i+1) # Remove ending equals and preceding whitespace from parameter name param.chomp!('=') param.lstrip! if disposition[0] == '"' # Parameter value is quoted, parse it, handling backslash escapes disposition.slice!(0, 1) value = String.new while i = disposition.index(/(["\\])/) c = $1 # Append all content until ending quote or escape value << disposition.slice!(0, i) # Remove either backslash or ending quote, # ensures forward progress in loop disposition.slice!(0, 1) # stop parsing parameter value if found ending quote break if c == '"' escaped_char = disposition.slice!(0, 1) if param == 'filename' && escaped_char != '"' # Possible IE uploaded filename, append both escape backslash and value value << c << escaped_char else # Other only append escaped value value << escaped_char end end else if i = disposition.index(';') # Parameter value unquoted (which may be invalid), value ends at semicolon value = disposition.slice!(0, i) else # If no ending semicolon, assume remainder of line is value and stop # parsing disposition.strip! value = disposition disposition = '' end end case param when 'name' name = value when 'filename' filename = value when 'filename*' filename_star = value # else # ignore other parameters end # skip trailing semicolon, to proceed to next parameter if i = disposition.index(';') disposition.slice!(0, i+1) end end else name = head[MULTIPART_CONTENT_ID, 1] end if filename_star encoding, _, filename = filename_star.split("'", 3) filename = normalize_filename(filename || '') filename.force_encoding(find_encoding(encoding)) elsif filename filename = normalize_filename(filename) end if name.nil? || name.empty? name = filename || "#{content_type || TEXT_PLAIN}[]".dup end @collector.on_mime_head @mime_index, head, filename, content_type, name @state = :MIME_BODY else :want_read end end
#normalize_filename(filename) (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 438
def normalize_filename(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } filename = Utils.unescape_path(filename) end filename.scrub! filename.split(/[\/\\]/).last || String.new end
#parse(io)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 217
def parse(io) outbuf = String.new read_data(io, outbuf) loop do status = case @state when :FAST_FORWARD handle_fast_forward when :CONSUME_TOKEN handle_consume_token when :MIME_HEAD handle_mime_head when :MIME_BODY handle_mime_body else # when :DONE return end read_data(io, outbuf) if status == :want_read end end
#read_data(io, outbuf) (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 253
def read_data(io, outbuf) content = io.read(@bufsize, outbuf) handle_empty_content!(content) @sbuf.concat(content) end
#result
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 240
def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) name, data = handle_dummy_encoding(part.name, data) @query_parser.normalize_params(@params, name, data) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end
#tag_multipart_encoding(filename, content_type, name, body) (private)
[ GitHub ]# File 'lib/rack/multipart/parser.rb', line 451
def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s encoding = Encoding::UTF_8 name.force_encoding(encoding) return if filename if content_type list = content_type.split(';') type_subtype = list.first type_subtype.strip! if TEXT_PLAIN == type_subtype rest = list.drop 1 rest.each do |param| k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') if k == "charset" encoding = find_encoding(v) end end end end name.force_encoding(encoding) body.force_encoding(encoding) end