437 lines
13 KiB
C
Executable file
437 lines
13 KiB
C
Executable file
// url.c -- Object representing uniform resource locators
|
|
// Copyright (C) 2008-2010 Markus Gutschke <markus@shellinabox.com>
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License version 2 as
|
|
// published by the Free Software Foundation.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License along
|
|
// with this program; if not, write to the Free Software Foundation, Inc.,
|
|
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
//
|
|
// In addition to these license terms, the author grants the following
|
|
// additional rights:
|
|
//
|
|
// If you modify this program, or any covered work, by linking or
|
|
// combining it with the OpenSSL project's OpenSSL library (or a
|
|
// modified version of that library), containing parts covered by the
|
|
// terms of the OpenSSL or SSLeay licenses, the author
|
|
// grants you additional permission to convey the resulting work.
|
|
// Corresponding Source for a non-source form of such a combination
|
|
// shall include the source code for the parts of OpenSSL used as well
|
|
// as that of the covered work.
|
|
//
|
|
// You may at your option choose to remove this additional permission from
|
|
// the work, or from any part of it.
|
|
//
|
|
// It is possible to build this program in a way that it loads OpenSSL
|
|
// libraries at run-time. If doing so, the following notices are required
|
|
// by the OpenSSL and SSLeay licenses:
|
|
//
|
|
// This product includes software developed by the OpenSSL Project
|
|
// for use in the OpenSSL Toolkit. (http://www.openssl.org/)
|
|
//
|
|
// This product includes cryptographic software written by Eric Young
|
|
// (eay@cryptsoft.com)
|
|
//
|
|
//
|
|
// The most up-to-date version of this program is always available from
|
|
// http://shellinabox.com
|
|
|
|
#include "config.h"
|
|
|
|
#define _XOPEN_SOURCE 500
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#ifdef HAVE_STRINGS_H
|
|
#include <strings.h> // for strncasecmp()
|
|
#endif
|
|
|
|
#include "libhttp/url.h"
|
|
|
|
#include "logging/logging.h"
|
|
|
|
#ifdef HAVE_UNUSED
|
|
#defined ATTR_UNUSED __attribute__((unused))
|
|
#defined UNUSED(x) do { } while (0)
|
|
#else
|
|
#define ATTR_UNUSED
|
|
#define UNUSED(x) do { (void)(x); } while (0)
|
|
#endif
|
|
|
|
static char *urlUnescape(char *s) {
|
|
int warned = 0;
|
|
char *r = s;
|
|
for (char *u = s; *u; ) {
|
|
char ch = *u++;
|
|
if (ch == '+') {
|
|
ch = ' ';
|
|
} else if (ch == '%') {
|
|
char c1 = *u;
|
|
if ((c1 >= '0' && c1 <= '9') || ((c1 &= ~0x20) >= 'A' && c1 <= 'F')) {
|
|
ch = c1 - (c1 > '9' ? 'A' - 10 : '0');
|
|
char c2 = *++u;
|
|
if ((c2 >= '0' && c2 <= '9') || ((c2 &= ~0x20) >= 'A' && c2 <= 'F')) {
|
|
ch = (ch << 4) + c2 - (c2 > '9' ? 'A' - 10 : '0');
|
|
++u;
|
|
} else if (!warned++) {
|
|
warn("[http] Malformed URL encoded data \"%s\"!", r);
|
|
}
|
|
} else if (!warned++) {
|
|
warn("[http] Malformed URL encoded data \"%s\"!", r);
|
|
}
|
|
}
|
|
*s++ = ch;
|
|
}
|
|
*s = '\000';
|
|
return r;
|
|
}
|
|
|
|
static void urlDestroyHashMapEntry(void *arg ATTR_UNUSED, char *key,
|
|
char *value) {
|
|
UNUSED(arg);
|
|
free(key);
|
|
free(value);
|
|
}
|
|
|
|
static char *urlMakeString(const char *buf, int len) {
|
|
if (!buf) {
|
|
return NULL;
|
|
} else {
|
|
char *s;
|
|
check(s = malloc(len + 1));
|
|
memcpy(s, buf, len);
|
|
s[len] = '\000';
|
|
return s;
|
|
}
|
|
}
|
|
|
|
static void urlParseQueryString(struct HashMap *hashmap, const char *query, int len) {
|
|
const char *key = query;
|
|
const char *value = NULL;
|
|
for (const char *ampersand = query; len-- >= 0; ampersand++) {
|
|
char ch = len >= 0 ? *ampersand : '\000';
|
|
if (ch == '=' && !value) {
|
|
value = ampersand + 1;
|
|
} else if (ch == '&' || len < 0) {
|
|
int kl = (value ? value-1 : ampersand) - key;
|
|
int vl = value ? ampersand - value : 0;
|
|
if (kl) {
|
|
char *k = urlMakeString(key, kl);
|
|
urlUnescape(k);
|
|
char *v = NULL;
|
|
if (value) {
|
|
v = urlMakeString(value, vl);
|
|
urlUnescape(v);
|
|
}
|
|
addToHashMap(hashmap, k, v);
|
|
}
|
|
key = ampersand + 1;
|
|
value = NULL;
|
|
}
|
|
if (!ch) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void urlParseHeaderLine(struct HashMap *hashmap, const char *s,
|
|
int len) {
|
|
while (s && len > 0) {
|
|
while (len > 0 && (*s == ' ' || *s == ';')) {
|
|
s++;
|
|
len--;
|
|
}
|
|
const char *key = s;
|
|
const char *value = NULL;
|
|
while (len > 0 && *s != ';') {
|
|
if (*s == '=' && value == NULL) {
|
|
value = s + 1;
|
|
}
|
|
s++;
|
|
len--;
|
|
}
|
|
int kl = (value ? value-1 : s) - key;
|
|
int vl = value ? s - value : 0;
|
|
if (kl) {
|
|
char *k = urlMakeString(key, kl);
|
|
for (char *t = k; *t; t++) {
|
|
if (*t >= 'a' && *t <= 'z') {
|
|
*t |= 0x20;
|
|
}
|
|
}
|
|
char *v = NULL;
|
|
if (value) {
|
|
if (vl >= 2 && value[0] == '"' && value[vl-1] == '"') {
|
|
value++;
|
|
vl--;
|
|
}
|
|
v = urlMakeString(value, vl);
|
|
}
|
|
addToHashMap(hashmap, k, v);
|
|
}
|
|
}
|
|
}
|
|
|
|
static const char *urlMemstr(const char *buf, int len, const char *s) {
|
|
int sLen = strlen(s);
|
|
if (!sLen) {
|
|
return buf;
|
|
}
|
|
while (len >= sLen) {
|
|
if (len > sLen) {
|
|
char *first = memchr(buf, *s, len - sLen);
|
|
if (!first) {
|
|
return NULL;
|
|
}
|
|
len -= first - buf;
|
|
buf = first;
|
|
}
|
|
if (!memcmp(buf, s, sLen)) {
|
|
return buf;
|
|
}
|
|
buf++;
|
|
len--;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static int urlMemcmp(const char *buf, int len, const char *s) {
|
|
int sLen = strlen(s);
|
|
if (len < sLen) {
|
|
return s[len];
|
|
} else {
|
|
return memcmp(buf, s, sLen);
|
|
}
|
|
}
|
|
|
|
static int urlMemcasecmp(const char *buf, int len, const char *s) {
|
|
int sLen = strlen(s);
|
|
if (len < sLen) {
|
|
return s[len];
|
|
} else {
|
|
return strncasecmp(buf, s, sLen);
|
|
}
|
|
}
|
|
|
|
static void urlParsePart(struct URL *url, const char *buf, int len) {
|
|
// Most browsers seem to forget quoting data in the header fields. This
|
|
// means, it is quite possible for an HTML form to cause the submission of
|
|
// unparseable "multipart/form-data". If this happens, we just give up
|
|
// and ignore the malformed data.
|
|
// Example:
|
|
// <form method="POST" enctype="multipart/form-data">
|
|
// <input type="file" name="" X: x="">
|
|
// <input type="submit">
|
|
// </form>
|
|
char *name = NULL;
|
|
for (const char *eol; !!(eol = urlMemstr(buf, len, "\r\n")); ) {
|
|
if (buf == eol) {
|
|
buf += 2;
|
|
len -= 2;
|
|
if (name) {
|
|
char *value = len ? urlMakeString(buf, len) : NULL;
|
|
addToHashMap(&url->args, name, value);
|
|
name = NULL;
|
|
}
|
|
break;
|
|
} else {
|
|
if (!name && !urlMemcasecmp(buf, len, "content-disposition:")) {
|
|
struct HashMap fields;
|
|
initHashMap(&fields, urlDestroyHashMapEntry, NULL);
|
|
urlParseHeaderLine(&fields, buf + 20, eol - buf - 20);
|
|
if (getRefFromHashMap(&fields, "form-data")) {
|
|
// We currently don't bother to deal with binary files (e.g. files
|
|
// that include NUL characters). If this ever becomes necessary,
|
|
// we could check for the existence of a "filename" field and use
|
|
// that as an indicator to store the payload in something other
|
|
// than "url->args".
|
|
name = (char *)getFromHashMap(&fields, "name");
|
|
if (name && *name) {
|
|
check(name = strdup(name));
|
|
}
|
|
}
|
|
destroyHashMap(&fields);
|
|
}
|
|
len -= eol - buf + 2;
|
|
buf = eol + 2;
|
|
}
|
|
}
|
|
free(name);
|
|
}
|
|
|
|
static void urlParsePostBody(struct URL *url,
|
|
const struct HttpConnection *http,
|
|
const char *buf, int len) {
|
|
struct HashMap contentType;
|
|
initHashMap(&contentType, urlDestroyHashMapEntry, NULL);
|
|
const char *ctHeader = getFromHashMap(&http->header, "content-type");
|
|
urlParseHeaderLine(&contentType, ctHeader, ctHeader ? strlen(ctHeader) : 0);
|
|
if (getRefFromHashMap(&contentType, "application/x-www-form-urlencoded")) {
|
|
urlParseQueryString(&url->args, buf, len);
|
|
} else if (getRefFromHashMap(&contentType, "multipart/form-data")) {
|
|
const char *boundary = getFromHashMap(&contentType, "boundary");
|
|
if (boundary && *boundary) {
|
|
const char *lastPart = NULL;
|
|
for (const char *part = buf; len > 0; ) {
|
|
const char *ptr;
|
|
if ((part == buf && (ptr = urlMemstr(part, len, "--")) != NULL) ||
|
|
(ptr = urlMemstr(part, len, "\r\n--")) != NULL) {
|
|
len -= ptr - part + (part == buf ? 2 : 4);
|
|
part = ptr + (part == buf ? 2 : 4);
|
|
if (!urlMemcmp(part, len, boundary)) {
|
|
int i = strlen(boundary);
|
|
len -= i;
|
|
part += i;
|
|
if (!urlMemcmp(part, len, "\r\n")) {
|
|
len -= 2;
|
|
part += 2;
|
|
if (lastPart) {
|
|
urlParsePart(url, lastPart, ptr - lastPart);
|
|
} else {
|
|
if (ptr != buf) {
|
|
info("[http] Ignoring prologue before \"multipart/form-data\"!");
|
|
}
|
|
}
|
|
lastPart = part;
|
|
} else if (!urlMemcmp(part, len, "--\r\n")) {
|
|
len -= 4;
|
|
part += 4;
|
|
urlParsePart(url, lastPart, ptr - lastPart);
|
|
lastPart = NULL;
|
|
if (len > 0) {
|
|
info("[http] Ignoring epilogue past end of \"multipart/"
|
|
"form-data\"!");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (lastPart) {
|
|
warn("[http] Missing final \"boundary\" for \"multipart/form-data\"!");
|
|
}
|
|
} else {
|
|
warn("[http] Missing \"boundary\" information for \"multipart/form-data\"!");
|
|
}
|
|
}
|
|
destroyHashMap(&contentType);
|
|
}
|
|
|
|
struct URL *newURL(const struct HttpConnection *http,
|
|
const char *buf, int len) {
|
|
struct URL *url;
|
|
check(url = malloc(sizeof(struct URL)));
|
|
initURL(url, http, buf, len);
|
|
return url;
|
|
}
|
|
|
|
void initURL(struct URL *url, const struct HttpConnection *http,
|
|
const char *buf, int len) {
|
|
url->protocol = strdup(httpGetProtocol(http));
|
|
url->user = NULL;
|
|
url->password = NULL;
|
|
url->host = strdup(httpGetHost(http));
|
|
url->port = httpGetPort(http);
|
|
url->path = strdup(httpGetPath(http));
|
|
url->pathinfo = strdup(httpGetPathInfo(http));
|
|
url->query = strdup(httpGetQuery(http));
|
|
url->anchor = NULL;
|
|
url->url = NULL;
|
|
initHashMap(&url->args, urlDestroyHashMapEntry, NULL);
|
|
if (!strcmp(http->method, "GET")) {
|
|
check(url->query);
|
|
urlParseQueryString(&url->args, url->query, strlen(url->query));
|
|
} else if (!strcmp(http->method, "POST")) {
|
|
urlParsePostBody(url, http, buf, len);
|
|
}
|
|
}
|
|
|
|
void destroyURL(struct URL *url) {
|
|
if (url) {
|
|
free(url->protocol);
|
|
free(url->user);
|
|
free(url->password);
|
|
free(url->host);
|
|
free(url->path);
|
|
free(url->pathinfo);
|
|
free(url->query);
|
|
free(url->anchor);
|
|
free(url->url);
|
|
destroyHashMap(&url->args);
|
|
}
|
|
}
|
|
|
|
void deleteURL(struct URL *url) {
|
|
destroyURL(url);
|
|
free(url);
|
|
}
|
|
|
|
const char *urlGetProtocol(struct URL *url) {
|
|
return url->protocol;
|
|
}
|
|
|
|
const char *urlGetUser(struct URL *url) {
|
|
return url->user;
|
|
}
|
|
|
|
const char *urlGetPassword(struct URL *url) {
|
|
return url->password;
|
|
}
|
|
|
|
const char *urlGetHost(struct URL *url) {
|
|
return url->host;
|
|
}
|
|
|
|
int urlGetPort(struct URL *url) {
|
|
return url->port;
|
|
}
|
|
|
|
const char *urlGetPath(struct URL *url) {
|
|
return url->path;
|
|
}
|
|
|
|
const char *urlGetPathInfo(struct URL *url) {
|
|
return url->pathinfo;
|
|
}
|
|
|
|
const char *urlGetQuery(struct URL *url) {
|
|
return url->query;
|
|
}
|
|
|
|
const char *urlGetAnchor(struct URL *url) {
|
|
return url->anchor;
|
|
}
|
|
|
|
const char *urlGetURL(struct URL *url) {
|
|
if (!url->url) {
|
|
const char *host = urlGetHost(url);
|
|
int s_size = 8 + strlen(host) + 25 + strlen(url->path);
|
|
check(*(char **)&url->url = malloc(s_size + 1));
|
|
*url->url = '\000';
|
|
strncat(url->url, url->protocol, s_size);
|
|
strncat(url->url, "://", s_size);
|
|
strncat(url->url, host, s_size);
|
|
if (url->port != (strcmp(url->protocol, "http") ? 443 : 80)) {
|
|
snprintf(strrchr(url->url, '\000'), 25, ":%d", url->port);
|
|
}
|
|
strncat(url->url, url->path, s_size);
|
|
}
|
|
return url->url;
|
|
}
|
|
|
|
const struct HashMap *urlGetArgs(struct URL *url) {
|
|
return &url->args;
|
|
}
|
|
|
|
struct HashMap *urlParseQuery(const char *buf, int len) {
|
|
struct HashMap *hashmap = newHashMap(urlDestroyHashMapEntry, NULL);
|
|
urlParseQueryString(hashmap, buf, len);
|
|
return hashmap;
|
|
}
|