freeswitch/libs/sofia-sip/libsofia-sip-ua/http/http_parser.c

573 lines
14 KiB
C
Raw Normal View History

/*
* This file is part of the Sofia-SIP package
*
* Copyright (C) 2005 Nokia Corporation.
*
* Contact: Pekka Pessi <pekka.pessi@nokia.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
*/
/**@CFILE http_parser.c
*
* HTTP parser.
*
* @author Pekka Pessi <Pekka.Pessi@nokia.com>
*
* @date Created: Thu Oct 5 14:01:24 2000 ppessi
*/
#include "config.h"
/* Avoid casting http_t to msg_pub_t and http_header_t to msg_header_t */
#define MSG_PUB_T struct http_s
#define MSG_HDR_T union http_header_u
#include <sofia-sip/su_alloc.h>
#include <sofia-sip/su_string.h>
#include "sofia-sip/http_parser.h"
#include <sofia-sip/msg_parser.h>
#include <sofia-sip/http_header.h>
#include <sofia-sip/http_status.h>
#include <sofia-sip/msg_mclass.h>
#include <sofia-sip/su_tagarg.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include <stdarg.h>
/** HTTP version 1.1. */
char const http_version_1_1[] = "HTTP/1.1";
/** HTTP version 1.0. */
char const http_version_1_0[] = "HTTP/1.0";
/** HTTP version 0.9 is an empty string. */
char const http_version_0_9[] = "";
msg_mclass_t const *http_default_mclass(void)
{
extern msg_mclass_t const http_mclass[];
return http_mclass;
}
static
issize_t http_extract_chunk(msg_t *, http_t *, char b[], isize_t bsiz, int eos);
/** Calculate length of line ending (0, 1 or 2) */
#define CRLF_TEST(s) \
(((s)[0]) == '\r' ? (((s)[1]) == '\n') + 1 : ((s)[0])=='\n')
/** Extract the HTTP message body, including separator line.
*
* @retval -1 error
* @retval 0 cannot proceed
* @retval other number of bytes extracted
*/
issize_t http_extract_body(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos)
{
issize_t m = 0;
size_t body_len;
int flags = http->http_flags;
if (eos && bsiz == 0) {
msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
return 0;
}
if (flags & MSG_FLG_TRAILERS) {
/* The empty line after trailers */
if (!eos && (bsiz == 0 || (bsiz == 1 && b[0] == '\r')))
return 0;
m = CRLF_TEST(b);
assert(m > 0 || eos); /* We should be looking at an empty line */
/* We have completed trailers */
msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
return m;
}
if (flags & MSG_FLG_CHUNKS)
return http_extract_chunk(msg, http, b, bsiz, eos);
if (!(flags & MSG_FLG_BODY)) {
/* We are looking at a potential empty line */
m = msg_extract_separator(msg, http, b, bsiz, eos);
if (m == 0) /* Not yet */
return 0;
http->http_flags |= MSG_FLG_BODY;
b += m, bsiz -= m;
}
/* body_len is determined by rules in RFC2616 sections 4.3 and 4.4 */
/* 1XX, 204, 304 do not have message-body, ever */
if (http->http_status) {
int status = http->http_status->st_status;
if (status < 200 || status == 204 || status == 304)
flags |= HTTP_FLG_NO_BODY;
}
if (flags & HTTP_FLG_NO_BODY) {
msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
return m;
}
if (http->http_transfer_encoding) {
if (/* NOTE - there is really no Transfer-Encoding: identity in RFC 2616
* but it was used in drafts...
*/
http->http_transfer_encoding->k_items &&
http->http_transfer_encoding->k_items[0] &&
!su_casematch(http->http_transfer_encoding->k_items[0], "identity")) {
http->http_flags |= MSG_FLG_CHUNKS;
if (http->http_flags & MSG_FLG_STREAMING)
msg_set_streaming(msg, msg_start_streaming);
if (m)
return m;
return http_extract_chunk(msg, http, b, bsiz, eos);
}
}
if (http->http_content_length)
body_len = http->http_content_length->l_length;
/* We cannot parse multipart/byteranges ... */
else if (http->http_content_type && http->http_content_type->c_type &&
su_casematch(http->http_content_type->c_type, "multipart/byteranges"))
return -1;
else if (MSG_IS_MAILBOX(flags)) /* message fragments */
body_len = 0;
else if (http->http_request)
body_len = 0;
else if (eos)
body_len = bsiz;
else
return 0; /* XXX */
if (body_len == 0) {
msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
return m;
}
if (http->http_flags & MSG_FLG_STREAMING)
msg_set_streaming(msg, msg_start_streaming);
if (m)
return m;
m = msg_extract_payload(msg, http, NULL, body_len, b, bsiz, eos);
if (m == -1)
return -1;
/* We have now all message fragments in place */
http->http_flags |= MSG_FLG_FRAGS;
if (bsiz >= body_len) {
msg_mark_as_complete(msg, MSG_FLG_COMPLETE);
}
return m;
}
/** Extract a chunk.
*
* @retval -1 error
* @retval 0 cannot proceed
* @retval other number of bytes extracted
*/
issize_t http_extract_chunk(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos)
{
size_t n;
unsigned crlf, chunk_len;
char *b0 = b, *s;
union {
msg_header_t *header;
msg_payload_t *chunk;
} h = { NULL };
size_t bsiz0 = bsiz;
if (bsiz == 0)
return 0;
/* We should be looking at an empty line followed by the chunk header */
while ((crlf = CRLF_TEST(b))) {
if (bsiz == 1 && crlf == 1 && b[0] == '\r' && !eos)
return 0;
if (crlf == bsiz) {
if (eos) {
msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
return (b - b0) + crlf;
}
else
return 0;
}
assert(crlf < bsiz);
/* Skip crlf */
b += crlf; bsiz -= crlf;
}
/* Now, looking at the chunk header */
n = strcspn(b, CRLF);
if (!eos && n == bsiz)
return 0;
crlf = CRLF_TEST(b + n);
if (n == 0) {
if (crlf == bsiz && eos) {
msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
return crlf;
}
else
return -1; /* XXX - should we be more liberal? */
}
if (!eos && n + crlf == bsiz && (crlf == 0 || (crlf == 1 && b[n] == '\r')))
return 0;
chunk_len = strtoul(b, &s, 16);
if (s == b)
return -1;
skip_ws(&s);
if (s != b + n && s[0] != ';') /* Extra stuff that is not parameter */
return -1;
if (chunk_len == 0) { /* We found last-chunk */
b += n + crlf, bsiz -= n + crlf;
crlf = bsiz > 0 ? CRLF_TEST(b) : 0;
if ((eos && bsiz == 0) || crlf == 2 ||
(crlf == 1 && (bsiz > 1 || b[0] == '\n'))) {
/* Shortcut - We got empty trailers */
b += crlf;
msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS);
} else {
/* We have to parse trailers */
http->http_flags |= MSG_FLG_TRAILERS;
}
return b - b0;
}
else {
issize_t chunk;
b += n + crlf, bsiz -= n + crlf;
/* Extract chunk */
chunk = msg_extract_payload(msg, http,
&h.header, chunk_len + (b - b0),
b0, bsiz0, eos);
if (chunk != -1 && h.header) {
assert(h.chunk->pl_data);
h.chunk->pl_data += (b - b0);
h.chunk->pl_len -= (b - b0);
}
return chunk;
}
}
/** Parse HTTP version.
*
* The function http_version_d() parses a HTTP method.
*
* @retval 0 when successful,
* @retval -1 upon an error.
*/
int http_version_d(char **ss, char const **ver)
{
char *s = *ss;
char const *result;
int const version_size = sizeof(http_version_1_1) - 1;
if (su_casenmatch(s, http_version_1_1, version_size) &&
!IS_TOKEN(s[version_size])) {
result = http_version_1_1;
s += version_size;
}
else if (su_casenmatch(s, http_version_1_0, version_size) &&
!IS_TOKEN(s[version_size])) {
result = http_version_1_0;
s += version_size;
}
else if (s[0] == '\0') {
result = http_version_0_9;
} else {
/* Version consists of one or two tokens, separated by / */
size_t l1 = 0, l2 = 0, n;
result = s;
l1 = span_token(s);
for (n = l1; IS_LWS(s[n]); n++)
s[n] = '\0';
if (s[n] == '/') {
for (n = n + 1; IS_LWS(s[n]); n++)
{}
l2 = span_token(s + n);
n += l2;
}
if (l1 == 0)
return -1;
/* If there is extra ws between tokens, compact version */
if (l2 > 0 && n > l1 + 1 + l2) {
s[l1] = '/';
memmove(s + l1 + 1, s + n - l2, l2);
s[l1 + 1 + l2] = 0;
/* Compare again with compacted version */
if (su_casematch(s, http_version_1_1))
result = http_version_1_1;
else if (su_casematch(s, http_version_1_0))
result = http_version_1_0;
}
s += n;
}
while (IS_LWS(*s)) *s++ = '\0';
*ss = s;
if (ver)
*ver = result;
return 0;
}
/** Calculate extra space required by version string */
isize_t http_version_xtra(char const *version)
{
if (version == http_version_1_1)
return 0;
else if (version == http_version_1_0)
return 0;
else
return MSG_STRING_SIZE(version);
}
/** Duplicate a transport string */
void http_version_dup(char **pp, char const **dd, char const *s)
{
if (s == http_version_1_1)
*dd = s;
else if (s == http_version_1_0)
*dd = s;
else
MSG_STRING_DUP(*pp, *dd, s);
}
/** Well-known HTTP method names. */
static char const * const methods[] = {
"<UNKNOWN>",
http_method_name_get,
http_method_name_post,
http_method_name_head,
http_method_name_options,
http_method_name_put,
http_method_name_delete,
http_method_name_trace,
http_method_name_connect,
NULL,
/* If you add something here, add also them to http_method_d! */
};
char const http_method_name_get[] = "GET";
char const http_method_name_post[] = "POST";
char const http_method_name_head[] = "HEAD";
char const http_method_name_options[] = "OPTIONS";
char const http_method_name_put[] = "PUT";
char const http_method_name_delete[] = "DELETE";
char const http_method_name_trace[] = "TRACE";
char const http_method_name_connect[] = "CONNECT";
char const *http_method_name(http_method_t method, char const *name)
{
if (method > 0 && (size_t)method < sizeof(methods)/sizeof(methods[0]))
return methods[method];
else if (method == 0)
return name;
else
return NULL;
}
/**Parse a HTTP method name.
*
* The function @c http_method_d() parses a HTTP method, and returns a code
* corresponding to the method. It stores the address of the first non-LWS
* character after method name in @c *ss.
*
* @param ss pointer to pointer to string to be parsed
* @param nname pointer to value-result parameter formethod name
*
* @note
* If there is no whitespace after method name, the value in @a *nname
* may not be NUL-terminated. The calling function @b must NUL terminate
* the value by setting the @a **ss to NUL after first examining its value.
*
* @return The function @c http_method_d returns the method code if method
* was identified, 0 (@c http_method_unknown) if method is not known, or @c -1
* (@c http_method_invalid) if an error occurred.
*
* If the value-result argument @a nname is not @c NULL, http_method_d()
* stores a pointer to the method name to it.
*/
http_method_t http_method_d(char **ss, char const **nname)
{
char *s = *ss, c = *s;
char const *name;
int code = http_method_unknown;
size_t n = 0;
#define MATCH(s, m) (su_casenmatch(s, m, n = sizeof(m) - 1))
if (c >= 'a' && c <= 'z')
c += 'A' - 'a';
switch (c) {
case 'C': if (MATCH(s, "CONNECT")) code = http_method_connect; break;
case 'D': if (MATCH(s, "DELETE")) code = http_method_delete; break;
case 'G': if (MATCH(s, "GET")) code = http_method_get; break;
case 'H': if (MATCH(s, "HEAD")) code = http_method_head; break;
case 'O': if (MATCH(s, "OPTIONS")) code = http_method_options; break;
case 'P': if (MATCH(s, "POST")) code = http_method_post;
else
if (MATCH(s, "PUT")) code = http_method_put; break;
case 'T': if (MATCH(s, "TRACE")) code = http_method_trace; break;
}
#undef MATCH
if (!code || IS_NON_WS(s[n])) {
/* Unknown method */
code = http_method_unknown;
name = s;
for (n = 0; IS_UNRESERVED(s[n]); n++)
;
if (s[n]) {
if (!IS_LWS(s[n]))
return http_method_invalid;
if (nname)
s[n++] = '\0';
}
}
else {
name = methods[code];
}
while (IS_LWS(s[n]))
n++;
*ss = (s + n);
if (nname) *nname = name;
return (http_method_t)code;
}
/** Get method enum corresponding to method name */
http_method_t http_method_code(char const *name)
{
/* Note that http_method_d() does not change string if nname is NULL */
return http_method_d((char **)&name, NULL);
}
/**Parse HTTP query string.
*
* The function http_query_parse() searches for the given keys in HTTP @a
* query. For each key, a query element (in the form name=value) is searched
* from the query string. If a query element has a beginning matching with
* the key, a copy of the rest of the element is returned in corresponding
* return_value argument.
*
* @note The @a query string will be modified.
*
* @return
* The function http_query_parse() returns number keys that matched within
* the @a query string.
*/
issize_t http_query_parse(char *query,
/* char const *key, char **return_value, */
...)
{
va_list ap;
char *q, *q_next;
char *name, *value, **return_value;
char const *key;
size_t namelen, valuelen, keylen;
isize_t N;
int has_value;
if (!query)
return -1;
for (q = query, N = 0; *q; q = q_next) {
namelen = strcspn(q, "=&");
valuelen = namelen + strcspn(q + namelen, "&");
q_next = q + valuelen;
if (*q_next)
*q_next++ = '\0';
value = q + namelen;
has_value = (*value) != '\0'; /* is the part in form of name=value? */
if (has_value)
*value++ = '\0';
name = url_unescape(q, q);
if (has_value) {
namelen = strlen(name);
name[namelen] = '=';
url_unescape(name + namelen + 1, value);
}
va_start(ap, query);
while ((key = va_arg(ap, char const *))) {
return_value = va_arg(ap, char **);
keylen = strlen(key);
if (strncmp(key, name, keylen) == 0) {
*return_value = name + keylen;
N++;
}
}
va_end(ap);
}
return N;
}