123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699 |
- //
- // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- //
- // Official repository: https://github.com/boostorg/beast
- //
- #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
- #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
- #include <boost/beast/core/detail/config.hpp>
- #include <boost/beast/core/error.hpp>
- #include <boost/beast/core/string.hpp>
- #include <boost/beast/http/field.hpp>
- #include <boost/beast/http/verb.hpp>
- #include <boost/beast/http/detail/basic_parser.hpp>
- #include <boost/asio/buffer.hpp>
- #include <boost/optional.hpp>
- #include <boost/assert.hpp>
- #include <limits>
- #include <memory>
- #include <type_traits>
- #include <utility>
- namespace boost {
- namespace beast {
- namespace http {
- /** A parser for decoding HTTP/1 wire format messages.
- This parser is designed to efficiently parse messages in the
- HTTP/1 wire format. It allocates no memory when input is
- presented as a single contiguous buffer, and uses minimal
- state. It will handle chunked encoding and it understands
- the semantics of the Connection, Content-Length, and Upgrade
- fields.
- The parser is optimized for the case where the input buffer
- sequence consists of a single contiguous buffer. The
- @ref beast::basic_flat_buffer class is provided, which guarantees
- that the input sequence of the stream buffer will be represented
- by exactly one contiguous buffer. To ensure the optimum performance
- of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
- such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
- Alternatively, the caller may use custom techniques to ensure that
- the structured portion of the HTTP message (header or chunk header)
- is contained in a linear buffer.
- The interface to the parser uses virtual member functions.
- To use this class, derive your type from @ref basic_parser. When
- bytes are presented, the implementation will make a series of zero
- or more calls to virtual functions, which the derived class must
- implement.
- Every virtual function must be provided by the derived class,
- or else a compilation error will be generated. The implementation
- will make sure that `ec` is clear before each virtual function
- is invoked. If a virtual function sets an error, it is propagated
- out of the parser to the caller.
- @tparam isRequest A `bool` indicating whether the parser will be
- presented with request or response message.
- @note If the parser encounters a field value with obs-fold
- longer than 4 kilobytes in length, an error is generated.
- */
- template<bool isRequest>
- class basic_parser
- : private detail::basic_parser_base
- {
- boost::optional<std::uint64_t>
- body_limit_ =
- boost::optional<std::uint64_t>(
- default_body_limit(is_request{})); // max payload body
- std::uint64_t len_ = 0; // size of chunk or body
- std::uint64_t len0_ = 0; // content length if known
- std::unique_ptr<char[]> buf_; // temp storage
- std::size_t buf_len_ = 0; // size of buf_
- std::size_t skip_ = 0; // resume search here
- std::uint32_t header_limit_ = 8192; // max header size
- unsigned short status_ = 0; // response status
- state state_ = state::nothing_yet; // initial state
- unsigned f_ = 0; // flags
- // limit on the size of the stack flat buffer
- static std::size_t constexpr max_stack_buffer = 8192;
- // Message will be complete after reading header
- static unsigned constexpr flagSkipBody = 1<< 0;
- // Consume input buffers across semantic boundaries
- static unsigned constexpr flagEager = 1<< 1;
- // The parser has read at least one byte
- static unsigned constexpr flagGotSome = 1<< 2;
- // Message semantics indicate a body is expected.
- // cleared if flagSkipBody set
- //
- static unsigned constexpr flagHasBody = 1<< 3;
- static unsigned constexpr flagHTTP11 = 1<< 4;
- static unsigned constexpr flagNeedEOF = 1<< 5;
- static unsigned constexpr flagExpectCRLF = 1<< 6;
- static unsigned constexpr flagConnectionClose = 1<< 7;
- static unsigned constexpr flagConnectionUpgrade = 1<< 8;
- static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
- static unsigned constexpr flagContentLength = 1<< 10;
- static unsigned constexpr flagChunked = 1<< 11;
- static unsigned constexpr flagUpgrade = 1<< 12;
- static unsigned constexpr flagFinalChunk = 1<< 13;
- static constexpr
- std::uint64_t
- default_body_limit(std::true_type)
- {
- // limit for requests
- return 1 * 1024 * 1024; // 1MB
- }
- static constexpr
- std::uint64_t
- default_body_limit(std::false_type)
- {
- // limit for responses
- return 8 * 1024 * 1024; // 8MB
- }
- template<bool OtherIsRequest>
- friend class basic_parser;
- friend class basic_parser_test;
- protected:
- /// Default constructor
- basic_parser() = default;
- /** Move constructor
- @note
- After the move, the only valid operation on the
- moved-from object is destruction.
- */
- basic_parser(basic_parser &&) = default;
- /// Move assignment
- basic_parser& operator=(basic_parser &&) = default;
- public:
- /// `true` if this parser parses requests, `false` for responses.
- using is_request =
- std::integral_constant<bool, isRequest>;
- /// Destructor
- virtual ~basic_parser() = default;
- /// Copy constructor
- basic_parser(basic_parser const&) = delete;
- /// Copy assignment
- basic_parser& operator=(basic_parser const&) = delete;
- /// Returns `true` if the parser has received at least one byte of input.
- bool
- got_some() const
- {
- return state_ != state::nothing_yet;
- }
- /** Returns `true` if the message is complete.
- The message is complete after the full header is prduced
- and one of the following is true:
- @li The skip body option was set.
- @li The semantics of the message indicate there is no body.
- @li The semantics of the message indicate a body is expected,
- and the entire body was parsed.
- */
- bool
- is_done() const
- {
- return state_ == state::complete;
- }
- /** Returns `true` if a the parser has produced the full header.
- */
- bool
- is_header_done() const
- {
- return state_ > state::fields;
- }
- /** Returns `true` if the message is an upgrade message.
- @note The return value is undefined unless
- @ref is_header_done would return `true`.
- */
- bool
- upgrade() const
- {
- return (f_ & flagConnectionUpgrade) != 0;
- }
- /** Returns `true` if the last value for Transfer-Encoding is "chunked".
- @note The return value is undefined unless
- @ref is_header_done would return `true`.
- */
- bool
- chunked() const
- {
- return (f_ & flagChunked) != 0;
- }
- /** Returns `true` if the message has keep-alive connection semantics.
- This function always returns `false` if @ref need_eof would return
- `false`.
- @note The return value is undefined unless
- @ref is_header_done would return `true`.
- */
- bool
- keep_alive() const;
- /** Returns the optional value of Content-Length if known.
- @note The return value is undefined unless
- @ref is_header_done would return `true`.
- */
- boost::optional<std::uint64_t>
- content_length() const;
- /** Returns the remaining content length if known
- If the message header specifies a Content-Length,
- the return value will be the number of bytes remaining
- in the payload body have not yet been parsed.
- @note The return value is undefined unless
- @ref is_header_done would return `true`.
- */
- boost::optional<std::uint64_t>
- content_length_remaining() const;
- /** Returns `true` if the message semantics require an end of file.
- Depending on the contents of the header, the parser may
- require and end of file notification to know where the end
- of the body lies. If this function returns `true` it will be
- necessary to call @ref put_eof when there will never be additional
- data from the input.
- */
- bool
- need_eof() const
- {
- return (f_ & flagNeedEOF) != 0;
- }
- /** Set the limit on the payload body.
- This function sets the maximum allowed size of the payload body,
- before any encodings except chunked have been removed. Depending
- on the message semantics, one of these cases will apply:
- @li The Content-Length is specified and exceeds the limit. In
- this case the result @ref error::body_limit is returned
- immediately after the header is parsed.
- @li The Content-Length is unspecified and the chunked encoding
- is not specified as the last encoding. In this case the end of
- message is determined by the end of file indicator on the
- associated stream or input source. If a sufficient number of
- body payload octets are presented to the parser to exceed the
- configured limit, the parse fails with the result
- @ref error::body_limit
- @li The Transfer-Encoding specifies the chunked encoding as the
- last encoding. In this case, when the number of payload body
- octets produced by removing the chunked encoding exceeds
- the configured limit, the parse fails with the result
- @ref error::body_limit.
-
- Setting the limit after any body octets have been parsed
- results in undefined behavior.
- The default limit is 1MB for requests and 8MB for responses.
- @param v An optional integral value representing the body limit.
- If this is equal to `boost::none`, then the body limit is disabled.
- */
- void
- body_limit(boost::optional<std::uint64_t> v)
- {
- body_limit_ = v;
- }
- /** Set a limit on the total size of the header.
- This function sets the maximum allowed size of the header
- including all field name, value, and delimiter characters
- and also including the CRLF sequences in the serialized
- input. If the end of the header is not found within the
- limit of the header size, the error @ref error::header_limit
- is returned by @ref put.
- Setting the limit after any header octets have been parsed
- results in undefined behavior.
- */
- void
- header_limit(std::uint32_t v)
- {
- header_limit_ = v;
- }
- /// Returns `true` if the eager parse option is set.
- bool
- eager() const
- {
- return (f_ & flagEager) != 0;
- }
- /** Set the eager parse option.
- Normally the parser returns after successfully parsing a structured
- element (header, chunk header, or chunk body) even if there are octets
- remaining in the input. This is necessary when attempting to parse the
- header first, or when the caller wants to inspect information which may
- be invalidated by subsequent parsing, such as a chunk extension. The
- `eager` option controls whether the parser keeps going after parsing
- structured element if there are octets remaining in the buffer and no
- error occurs. This option is automatically set or cleared during certain
- stream operations to improve performance with no change in functionality.
- The default setting is `false`.
- @param v `true` to set the eager parse option or `false` to disable it.
- */
- void
- eager(bool v)
- {
- if(v)
- f_ |= flagEager;
- else
- f_ &= ~flagEager;
- }
- /// Returns `true` if the skip parse option is set.
- bool
- skip() const
- {
- return (f_ & flagSkipBody) != 0;
- }
- /** Set the skip parse option.
- This option controls whether or not the parser expects to see an HTTP
- body, regardless of the presence or absence of certain fields such as
- Content-Length or a chunked Transfer-Encoding. Depending on the request,
- some responses do not carry a body. For example, a 200 response to a
- CONNECT request from a tunneling proxy, or a response to a HEAD request.
- In these cases, callers may use this function inform the parser that
- no body is expected. The parser will consider the message complete
- after the header has been received.
- @param v `true` to set the skip body option or `false` to disable it.
- @note This function must called before any bytes are processed.
- */
- void
- skip(bool v);
- /** Write a buffer sequence to the parser.
- This function attempts to incrementally parse the HTTP
- message data stored in the caller provided buffers. Upon
- success, a positive return value indicates that the parser
- made forward progress, consuming that number of
- bytes.
- In some cases there may be an insufficient number of octets
- in the input buffer in order to make forward progress. This
- is indicated by the code @ref error::need_more. When
- this happens, the caller should place additional bytes into
- the buffer sequence and call @ref put again.
- The error code @ref error::need_more is special. When this
- error is returned, a subsequent call to @ref put may succeed
- if the buffers have been updated. Otherwise, upon error
- the parser may not be restarted.
- @param buffers An object meeting the requirements of
- <em>ConstBufferSequence</em> that represents the next chunk of
- message data. If the length of this buffer sequence is
- one, the implementation will not allocate additional memory.
- The class @ref beast::basic_flat_buffer is provided as one way to
- meet this requirement
- @param ec Set to the error, if any occurred.
- @return The number of octets consumed in the buffer
- sequence. The caller should remove these octets even if the
- error is set.
- */
- template<class ConstBufferSequence>
- std::size_t
- put(ConstBufferSequence const& buffers, error_code& ec);
- #if ! BOOST_BEAST_DOXYGEN
- std::size_t
- put(net::const_buffer buffer,
- error_code& ec);
- #endif
- /** Inform the parser that the end of stream was reached.
- In certain cases, HTTP needs to know where the end of
- the stream is. For example, sometimes servers send
- responses without Content-Length and expect the client
- to consume input (for the body) until EOF. Callbacks
- and errors will still be processed as usual.
- This is typically called when a read from the
- underlying stream object sets the error code to
- `net::error::eof`.
- @note Only valid after parsing a complete header.
- @param ec Set to the error, if any occurred.
- */
- void
- put_eof(error_code& ec);
- protected:
- /** Called after receiving the request-line.
- This virtual function is invoked after receiving a request-line
- when parsing HTTP requests.
- It can only be called when `isRequest == true`.
- @param method The verb enumeration. If the method string is not
- one of the predefined strings, this value will be @ref verb::unknown.
- @param method_str The unmodified string representing the verb.
- @param target The request-target.
- @param version The HTTP-version. This will be 10 for HTTP/1.0,
- and 11 for HTTP/1.1.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- */
- virtual
- void
- on_request_impl(
- verb method,
- string_view method_str,
- string_view target,
- int version,
- error_code& ec) = 0;
- /** Called after receiving the status-line.
- This virtual function is invoked after receiving a status-line
- when parsing HTTP responses.
- It can only be called when `isRequest == false`.
- @param code The numeric status code.
- @param reason The reason-phrase. Note that this value is
- now obsolete, and only provided for historical or diagnostic
- purposes.
- @param version The HTTP-version. This will be 10 for HTTP/1.0,
- and 11 for HTTP/1.1.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- */
- virtual
- void
- on_response_impl(
- int code,
- string_view reason,
- int version,
- error_code& ec) = 0;
- /** Called once for each complete field in the HTTP header.
- This virtual function is invoked for each field that is received
- while parsing an HTTP message.
- @param name The known field enum value. If the name of the field
- is not recognized, this value will be @ref field::unknown.
- @param name_string The exact name of the field as received from
- the input, represented as a string.
- @param value A string holding the value of the field.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- */
- virtual
- void
- on_field_impl(
- field name,
- string_view name_string,
- string_view value,
- error_code& ec) = 0;
- /** Called once after the complete HTTP header is received.
- This virtual function is invoked once, after the complete HTTP
- header is received while parsing a message.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- */
- virtual
- void
- on_header_impl(error_code& ec) = 0;
- /** Called once before the body is processed.
- This virtual function is invoked once, before the content body is
- processed (but after the complete header is received).
- @param content_length A value representing the content length in
- bytes if the length is known (this can include a zero length).
- Otherwise, the value will be `boost::none`.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- */
- virtual
- void
- on_body_init_impl(
- boost::optional<std::uint64_t> const& content_length,
- error_code& ec) = 0;
- /** Called each time additional data is received representing the content body.
- This virtual function is invoked for each piece of the body which is
- received while parsing of a message. This function is only used when
- no chunked transfer encoding is present.
- @param body A string holding the additional body contents. This may
- contain nulls or unprintable characters.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- @see on_chunk_body_impl
- */
- virtual
- std::size_t
- on_body_impl(
- string_view body,
- error_code& ec) = 0;
- /** Called each time a new chunk header of a chunk encoded body is received.
- This function is invoked each time a new chunk header is received.
- The function is only used when the chunked transfer encoding is present.
- @param size The size of this chunk, in bytes.
- @param extensions A string containing the entire chunk extensions.
- This may be empty, indicating no extensions are present.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- */
- virtual
- void
- on_chunk_header_impl(
- std::uint64_t size,
- string_view extensions,
- error_code& ec) = 0;
- /** Called each time additional data is received representing part of a body chunk.
- This virtual function is invoked for each piece of the body which is
- received while parsing of a message. This function is only used when
- no chunked transfer encoding is present.
- @param remain The number of bytes remaining in this chunk. This includes
- the contents of passed `body`. If this value is zero, then this represents
- the final chunk.
- @param body A string holding the additional body contents. This may
- contain nulls or unprintable characters.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- @return This function should return the number of bytes actually consumed
- from the `body` value. Any bytes that are not consumed on this call
- will be presented in a subsequent call.
- @see on_body_impl
- */
- virtual
- std::size_t
- on_chunk_body_impl(
- std::uint64_t remain,
- string_view body,
- error_code& ec) = 0;
- /** Called once when the complete message is received.
- This virtual function is invoked once, after successfully parsing
- a complete HTTP message.
- @param ec An output parameter which the function may set to indicate
- an error. The error will be clear before this function is invoked.
- */
- virtual
- void
- on_finish_impl(error_code& ec) = 0;
- private:
- boost::optional<std::uint64_t>
- content_length_unchecked() const;
- template<class ConstBufferSequence>
- std::size_t
- put_from_stack(
- std::size_t size,
- ConstBufferSequence const& buffers,
- error_code& ec);
- void
- maybe_need_more(
- char const* p, std::size_t n,
- error_code& ec);
- void
- parse_start_line(
- char const*& p, char const* last,
- error_code& ec, std::true_type);
- void
- parse_start_line(
- char const*& p, char const* last,
- error_code& ec, std::false_type);
- void
- parse_fields(
- char const*& p, char const* last,
- error_code& ec);
- void
- finish_header(
- error_code& ec, std::true_type);
- void
- finish_header(
- error_code& ec, std::false_type);
- void
- parse_body(char const*& p,
- std::size_t n, error_code& ec);
- void
- parse_body_to_eof(char const*& p,
- std::size_t n, error_code& ec);
- void
- parse_chunk_header(char const*& p,
- std::size_t n, error_code& ec);
- void
- parse_chunk_body(char const*& p,
- std::size_t n, error_code& ec);
- void
- do_field(field f,
- string_view value, error_code& ec);
- };
- } // http
- } // beast
- } // boost
- #include <boost/beast/http/impl/basic_parser.hpp>
- #ifdef BOOST_BEAST_HEADER_ONLY
- #include <boost/beast/http/impl/basic_parser.ipp>
- #endif
- #endif
|