class Rack::Multipart::Parser
Rack::Multipart::Parser handles parsing of multipart/form-data requests.
File Parameter Contents
When processing file uploads, the parser returns a hash containing information about uploaded files. For file parameters, the hash includes:
-
:filename- The original filename, already URL decoded by the parser -
:type- The content type of the uploaded file -
:name- The parameter name from the form -
:tempfile- A Tempfile object containing the uploaded data -
:head- The raw header content for this part
Constants
- BOUNDARY_START_LIMIT
- BUFFERED_UPLOAD_BYTESIZE_LIMIT
- BUFSIZE
- CHARSET
- CONTENT_DISPOSITION_MAX_BYTES
- CONTENT_DISPOSITION_MAX_PARAMS
- EMPTY
- MIME_HEADER_BYTESIZE_LIMIT
- MultipartInfo
- REENCODE_DUMMY_ENCODINGS
- TEMPFILE_FACTORY
- TEXT_PLAIN
Attributes
Public Class Methods
Source
# File lib/rack/multipart/parser.rb, line 235 def initialize(boundary, tempfile, bufsize, query_parser) @query_parser = query_parser @params = query_parser.make_params @bufsize = bufsize @state = :FAST_FORWARD @mime_index = 0 @body_retained = nil @retained_size = 0 @collector = Collector.new tempfile @sbuf = StringScanner.new("".dup) @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m @body_regex_at_end = /#{@body_regex}\z/m @end_boundary_size = boundary.bytesize + 4 # (-- at start, -- at finish) @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish) @head_regex = /(.*?#{EOL})#{EOL}/m end
Source
# File lib/rack/multipart/parser.rb, line 122 def self.parse(io, content_length, content_type, tmpfile, bufsize, qp) return EMPTY if 0 == content_length boundary = parse_boundary content_type return EMPTY unless boundary if boundary.length > 70 # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary. # Most clients use no more than 55 characters. raise BoundaryTooLongError, "multipart boundary size too large (#{boundary.length} characters)" end io = BoundedIO.new(io, content_length) if content_length parser = new(boundary, tmpfile, bufsize, qp) parser.parse(io) parser.result end
Source
# File lib/rack/multipart/parser.rb, line 115 def self.parse_boundary(content_type) return unless content_type data = content_type.match(MULTIPART) return unless data data[1] end
Public Instance Methods
Source
# File lib/rack/multipart/parser.rb, line 254 def parse(io) outbuf = String.new read_data(io, outbuf) loop do status = case @state when :FAST_FORWARD handle_fast_forward when :CONSUME_TOKEN handle_consume_token when :MIME_HEAD handle_mime_head when :MIME_BODY handle_mime_body else # when :DONE return end read_data(io, outbuf) if status == :want_read end end
Source
# File lib/rack/multipart/parser.rb, line 277 def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) name, data = handle_dummy_encoding(part.name, data) @query_parser.normalize_params(@params, name, data) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end
Private Instance Methods
Source
# File lib/rack/multipart/parser.rb, line 493 def consume_boundary if read_buffer = @sbuf.scan_until(@body_regex) read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY else @sbuf.terminate nil end end
Scan until the we find the start or end of the boundary. If we find it, return the appropriate symbol for the start or end of the boundary. If we donβt find the start or end of the boundary, clear the buffer and return nil.
Source
# File lib/rack/multipart/parser.rb, line 548 def find_encoding(enc) Encoding.find enc rescue ArgumentError Encoding::BINARY end
Return the related Encoding object. However, because enc is submitted by the user, it may be invalid, so use a binary encoding in that case.
Source
# File lib/rack/multipart/parser.rb, line 330 def handle_consume_token tok = consume_boundary # break if we're at the end of a buffer, but not if it is the end of a field @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY) :DONE else :MIME_HEAD end end
Source
# File lib/rack/multipart/parser.rb, line 563 def handle_dummy_encoding(name, body) # A string object with a 'dummy' encoding does not have full functionality and can cause errors. # So here we covert it to UTF-8 so that it can be handled properly. if name.encoding.dummy? && REENCODE_DUMMY_ENCODINGS[name.encoding] name = name.encode(Encoding::UTF_8) body = body.encode(Encoding::UTF_8) end return name, body end
Source
# File lib/rack/multipart/parser.rb, line 573 def handle_empty_content!(content) if content.nil? || content.empty? raise EmptyContentError end end
Source
# File lib/rack/multipart/parser.rb, line 303 def handle_fast_forward while true case consume_boundary when :BOUNDARY # found opening boundary, transition to next state @state = :MIME_HEAD return when :END_BOUNDARY # invalid multipart upload if @sbuf.pos == @end_boundary_size && @sbuf.rest == EOL # stop parsing a buffer if a buffer is only an end boundary. @state = :DONE return end # retry for opening boundary else # We raise if we don't find the multipart boundary, to avoid unbounded memory # buffering. Note that the actual limit is the higher of 16KB and the buffer size (1MB by default) raise Error, "multipart boundary not found within limit" if @sbuf.string.bytesize > BOUNDARY_START_LIMIT # no boundary found, keep reading data return :want_read end end end
This handles the initial parser state. We read until we find the starting boundary, then we can transition to the next state. If we find the ending boundary, this is an invalid multipart upload, but keep scanning for opening boundary in that case. If no boundary found, we need to keep reading data and retry. Itβs highly unlikely the initial read will not consume the boundary. The client would have to deliberately craft a response with the opening boundary beyond the buffer size for that to happen.
Source
# File lib/rack/multipart/parser.rb, line 460 def handle_mime_body if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet body = body_with_boundary.sub(@body_regex_at_end, '') # remove the boundary from the string update_retained_size(body.bytesize) if @body_retained @collector.on_mime_body @mime_index, body @sbuf.pos += body.length + 2 # skip \r\n after the content @state = :CONSUME_TOKEN @mime_index += 1 else # Save what we have so far if @rx_max_size < @sbuf.rest_size delta = @sbuf.rest_size - @rx_max_size body = @sbuf.peek(delta) update_retained_size(body.bytesize) if @body_retained @collector.on_mime_body @mime_index, body @sbuf.pos += delta @sbuf.string = @sbuf.rest end :want_read end end
Source
# File lib/rack/multipart/parser.rb, line 342 def handle_mime_head if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if (disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]) && disposition.bytesize <= CONTENT_DISPOSITION_MAX_BYTES # ignore actual content-disposition value (should always be form-data) i = disposition.index(';') disposition.slice!(0, i+1) param = nil num_params = 0 # Parse parameter list while i = disposition.index('=') # Only parse up to max parameters, to avoid potential denial of service num_params += 1 break if num_params > CONTENT_DISPOSITION_MAX_PARAMS # Found end of parameter name, ensure forward progress in loop param = disposition.slice!(0, i+1) # Remove ending equals and preceding whitespace from parameter name param.chomp!('=') param.lstrip! if disposition[0] == '"' # Parameter value is quoted, parse it, handling backslash escapes disposition.slice!(0, 1) value = String.new while i = disposition.index(/(["\\])/) c = $1 # Append all content until ending quote or escape value << disposition.slice!(0, i) # Remove either backslash or ending quote, # ensures forward progress in loop disposition.slice!(0, 1) # stop parsing parameter value if found ending quote break if c == '"' escaped_char = disposition.slice!(0, 1) if param == 'filename' && escaped_char != '"' # Possible IE uploaded filename, append both escape backslash and value value << c << escaped_char else # Other only append escaped value value << escaped_char end end else if i = disposition.index(';') # Parameter value unquoted (which may be invalid), value ends at semicolon value = disposition.slice!(0, i) else # If no ending semicolon, assume remainder of line is value and stop # parsing disposition.strip! value = disposition disposition = '' end end case param when 'name' name = value when 'filename' filename = value when 'filename*' filename_star = value # else # ignore other parameters end # skip trailing semicolon, to proceed to next parameter if i = disposition.index(';') disposition.slice!(0, i+1) end end else name = head[MULTIPART_CONTENT_ID, 1] end if filename_star encoding, _, filename = filename_star.split("'", 3) filename = normalize_filename(filename || '') filename.force_encoding(find_encoding(encoding)) elsif filename filename = normalize_filename(filename) end if name.nil? || name.empty? name = filename || "#{content_type || TEXT_PLAIN}[]".dup end # Mime part head data is retained for both TempfilePart and BufferPart # for the entireity of the parse, even though it isn't used for BufferPart. update_retained_size(head.bytesize) # If a filename is given, a TempfilePart will be used, so the body will # not be buffered in memory. However, if a filename is not given, a BufferPart # will be used, and the body will be buffered in memory. @body_retained = !filename @collector.on_mime_head @mime_index, head, filename, content_type, name @state = :MIME_BODY else # We raise if the mime part header is too large, to avoid unbounded memory # buffering. Note that the actual limit is the higher of 64KB and the buffer size (1MB by default) raise Error, "multipart mime part header too large" if @sbuf.string.bytesize > MIME_HEADER_BYTESIZE_LIMIT return :want_read end end
Source
# File lib/rack/multipart/parser.rb, line 502 def normalize_filename(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } filename = Utils.unescape_path(filename) end filename.scrub! filename.split(/[\/\\]/).last || String.new end
Source
# File lib/rack/multipart/parser.rb, line 290 def read_data(io, outbuf) content = io.read(@bufsize, outbuf) handle_empty_content!(content) @sbuf.concat(content) end
Source
# File lib/rack/multipart/parser.rb, line 515 def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s encoding = Encoding::UTF_8 name.force_encoding(encoding) return if filename if content_type list = content_type.split(';') type_subtype = list.first type_subtype.strip! if TEXT_PLAIN == type_subtype rest = list.drop 1 rest.each do |param| k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') if k == "charset" encoding = find_encoding(v) end end end end name.force_encoding(encoding) body.force_encoding(encoding) end
Source
# File lib/rack/multipart/parser.rb, line 482 def update_retained_size(size) @retained_size += size if @retained_size > BUFFERED_UPLOAD_BYTESIZE_LIMIT raise Error, "multipart data over retained size limit" end end