shellinabox/libhttp/url.c

// url.c -- Object representing uniform resource locators
// Copyright (C) 2008-2010 Markus Gutschke <markus@shellinabox.com>
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
//
// In addition to these license terms, the author grants the following
// additional rights:
//
// If you modify this program, or any covered work, by linking or
// combining it with the OpenSSL project's OpenSSL library (or a
// modified version of that library), containing parts covered by the
// terms of the OpenSSL or SSLeay licenses, the author
// grants you additional permission to convey the resulting work.
// Corresponding Source for a non-source form of such a combination
// shall include the source code for the parts of OpenSSL used as well
// as that of the covered work.
//
// You may at your option choose to remove this additional permission from
// the work, or from any part of it.
//
// It is possible to build this program in a way that it loads OpenSSL
// libraries at run-time. If doing so, the following notices are required
// by the OpenSSL and SSLeay licenses:
//
// This product includes software developed by the OpenSSL Project
// for use in the OpenSSL Toolkit. (http://www.openssl.org/)
//
// This product includes cryptographic software written by Eric Young
// (eay@cryptsoft.com)
//
//
// The most up-to-date version of this program is always available from
// http://shellinabox.com

#include "config.h"

#define _XOPEN_SOURCE 500
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifdef HAVE_STRINGS_H
#include <strings.h> // for strncasecmp()
#endif

#include "libhttp/url.h"

#include "logging/logging.h"

#ifdef HAVE_UNUSED
#defined ATTR_UNUSED __attribute__((unused))
#defined UNUSED(x)   do { } while (0)
#else
#define ATTR_UNUSED
#define UNUSED(x)    do { (void)(x); } while (0)
#endif

static char *urlUnescape(char *s) {
  int warned    = 0;
  char *r       = s;
  for (char *u  = s; *u; ) {
    char ch     = *u++;
    if (ch == '+') {
      ch        = ' ';
    } else if (ch == '%') {
      char c1   = *u;
      if ((c1 >= '0' && c1 <= '9') || ((c1 &= ~0x20) >= 'A' && c1 <= 'F')) {
        ch      = c1 - (c1 > '9' ? 'A' - 10 : '0');
        char c2 = *++u;
        if ((c2 >= '0' && c2 <= '9') || ((c2 &= ~0x20) >= 'A' && c2 <= 'F')) {
          ch    = (ch << 4) + c2 - (c2 > '9' ? 'A' - 10 : '0');
          ++u;
        } else if (!warned++) {
          warn("[http] Malformed URL encoded data \"%s\"!", r);
        }
      } else if (!warned++) {
        warn("[http] Malformed URL encoded data \"%s\"!", r);
      }
    }
    *s++        = ch;
  }
  *s            = '\000';
  return r;
}

static void urlDestroyHashMapEntry(void *arg ATTR_UNUSED, char *key,
                                   char *value) {
  UNUSED(arg);
  free(key);
  free(value);
}

static char *urlMakeString(const char *buf, int len) {
  if (!buf) {
    return NULL;
  } else {
    char *s;
    check(s = malloc(len + 1));
    memcpy(s, buf, len);
    s[len]  = '\000';
    return s;
  }
}

static void urlParseQueryString(struct HashMap *hashmap, const char *query, int len) {
  const char *key   = query;
  const char *value = NULL;
  for (const char *ampersand = query; len-- >= 0; ampersand++) {
    char ch         = len >= 0 ? *ampersand : '\000';
    if (ch == '=' && !value) {
      value         = ampersand + 1;
    } else if (ch == '&' || len < 0) {
      int kl        = (value ? value-1 : ampersand) - key;
      int vl        = value ? ampersand - value : 0;
      if (kl) {
        char *k     = urlMakeString(key, kl);
        urlUnescape(k);
        char *v     = NULL;
        if (value) {
          v         = urlMakeString(value, vl);
          urlUnescape(v);
        }
        addToHashMap(hashmap, k, v);
      }
      key           = ampersand + 1;
      value         = NULL;
    }
    if (!ch) {
      break;
    }
  }
}

static void urlParseHeaderLine(struct HashMap *hashmap, const char *s,
                               int len) {
  while (s && len > 0) {
    while (len > 0 && (*s == ' ' || *s == ';')) {
      s++;
      len--;
    }
    const char *key   = s;
    const char *value = NULL;
    while (len > 0 && *s != ';') {
      if (*s == '=' && value == NULL) {
        value         = s + 1;
      }
      s++;
      len--;
    }
    int kl            = (value ? value-1 : s) - key;
    int vl            = value ? s - value : 0;
    if (kl) {
      char *k         = urlMakeString(key, kl);
      for (char *t = k; *t; t++) {
        if (*t >= 'a' && *t <= 'z') {
          *t         |= 0x20;
        }
      }
      char *v         = NULL;
      if (value) {
        if (vl >= 2 && value[0] == '"' && value[vl-1] == '"') {
          value++;
          vl--;
        }
        v             = urlMakeString(value, vl);
      }
      addToHashMap(hashmap, k, v);
    }
  }
}

static const char *urlMemstr(const char *buf, int len, const char *s) {
  int sLen        = strlen(s);
  if (!sLen) {
    return buf;
  }
  while (len >= sLen) {
    if (len > sLen) {
      char *first = memchr(buf, *s, len - sLen);
      if (!first) {
        return NULL;
      }
      len        -= first - buf;
      buf         = first;
    }
    if (!memcmp(buf, s, sLen)) {
      return buf;
    }
    buf++;
    len--;
  }
  return NULL;
}

static int urlMemcmp(const char *buf, int len, const char *s) {
  int sLen = strlen(s);
  if (len < sLen) {
    return s[len];
  } else {
    return memcmp(buf, s, sLen);
  }
}

static int urlMemcasecmp(const char *buf, int len, const char *s) {
  int sLen = strlen(s);
  if (len < sLen) {
    return s[len];
  } else {
    return strncasecmp(buf, s, sLen);
  }
}

static void urlParsePart(struct URL *url, const char *buf, int len) {
  // Most browsers seem to forget quoting data in the header fields. This
  // means, it is quite possible for an HTML form to cause the submission of
  // unparseable "multipart/form-data". If this happens, we just give up
  // and ignore the malformed data.
  // Example:
  // <form method="POST" enctype="multipart/form-data">
  //   <input type="file" name="&quot;&#13;&#10;X: x=&quot;">
  //   <input type="submit">
  // </form>
  char *name           = NULL;
  for (const char *eol; !!(eol = urlMemstr(buf, len, "\r\n")); ) {
    if (buf == eol) {
      buf             += 2;
      len             -= 2;
      if (name) {
        char *value    = len ? urlMakeString(buf, len) : NULL;
        addToHashMap(&url->args, name, value);
        name           = NULL;
      }
      break;
    } else {
      if (!name && !urlMemcasecmp(buf, len, "content-disposition:")) {
        struct HashMap fields;
        initHashMap(&fields, urlDestroyHashMapEntry, NULL);
        urlParseHeaderLine(&fields, buf + 20, eol - buf - 20);
        if (getRefFromHashMap(&fields, "form-data")) {
          // We currently don't bother to deal with binary files (e.g. files
          // that include NUL characters). If this ever becomes necessary,
          // we could check for the existence of a "filename" field and use
          // that as an indicator to store the payload in something other
          // than "url->args".
          name         = (char *)getFromHashMap(&fields, "name");
          if (name && *name) {
            check(name = strdup(name));
          }
        }
        destroyHashMap(&fields);
      }
      len             -= eol - buf + 2;
      buf              = eol + 2;
    }
  }
  free(name);
}

static void urlParsePostBody(struct URL *url,
                             const struct HttpConnection *http,
                             const char *buf, int len) {
  struct HashMap contentType;
  initHashMap(&contentType, urlDestroyHashMapEntry, NULL);
  const char *ctHeader     = getFromHashMap(&http->header, "content-type");
  urlParseHeaderLine(&contentType, ctHeader, ctHeader ? strlen(ctHeader) : 0);
  if (getRefFromHashMap(&contentType, "application/x-www-form-urlencoded")) {
    urlParseQueryString(&url->args, buf, len);
  } else if (getRefFromHashMap(&contentType, "multipart/form-data")) {
    const char *boundary   = getFromHashMap(&contentType, "boundary");
    if (boundary && *boundary) {
      const char *lastPart = NULL;
      for (const char *part = buf; len > 0; ) {
        const char *ptr;
        if ((part == buf && (ptr = urlMemstr(part, len, "--")) != NULL) ||
            (ptr = urlMemstr(part, len, "\r\n--")) != NULL) {
          len             -= ptr - part + (part == buf ? 2 : 4);
          part             = ptr + (part == buf ? 2 : 4);
          if (!urlMemcmp(part, len, boundary)) {
            int i          = strlen(boundary);
            len           -= i;
            part          += i;
            if (!urlMemcmp(part, len, "\r\n")) {
              len         -= 2;
              part        += 2;
              if (lastPart) {
                urlParsePart(url, lastPart, ptr - lastPart);
              } else {
                if (ptr != buf) {
                  info("[http] Ignoring prologue before \"multipart/form-data\"!");
                }
              }
              lastPart     = part;
            } else if (!urlMemcmp(part, len, "--\r\n")) {
              len         -= 4;
              part        += 4;
              urlParsePart(url, lastPart, ptr - lastPart);
              lastPart     = NULL;
              if (len > 0) {
                info("[http] Ignoring epilogue past end of \"multipart/"
				     "form-data\"!");
              }
            }
          }
        }
      }
      if (lastPart) {
        warn("[http] Missing final \"boundary\" for \"multipart/form-data\"!");
      }
    } else {
      warn("[http] Missing \"boundary\" information for \"multipart/form-data\"!");
    }
  }
  destroyHashMap(&contentType);
}

struct URL *newURL(const struct HttpConnection *http,
                   const char *buf, int len) {
  struct URL *url;
  check(url = malloc(sizeof(struct URL)));
  initURL(url, http, buf, len);
  return url;
}

void initURL(struct URL *url, const struct HttpConnection *http,
             const char *buf, int len) {
  url->protocol              = strdup(httpGetProtocol(http));
  url->user                  = NULL;
  url->password              = NULL;
  url->host                  = strdup(httpGetHost(http));
  url->port                  = httpGetPort(http);
  url->path                  = strdup(httpGetPath(http));
  url->pathinfo              = strdup(httpGetPathInfo(http));
  url->query                 = strdup(httpGetQuery(http));
  url->anchor                = NULL;
  url->url                   = NULL;
  initHashMap(&url->args, urlDestroyHashMapEntry, NULL);
  if (!strcmp(http->method, "GET")) {
    check(url->query);
    urlParseQueryString(&url->args, url->query, strlen(url->query));
  } else if (!strcmp(http->method, "POST")) {
    urlParsePostBody(url, http, buf, len);
  }
}

void destroyURL(struct URL *url) {
  if (url) {
    free(url->protocol);
    free(url->user);
    free(url->password);
    free(url->host);
    free(url->path);
    free(url->pathinfo);
    free(url->query);
    free(url->anchor);
    free(url->url);
    destroyHashMap(&url->args);
  }
}

void deleteURL(struct URL *url) {
  destroyURL(url);
  free(url);
}

const char *urlGetProtocol(struct URL *url) {
  return url->protocol;
}

const char *urlGetUser(struct URL *url) {
  return url->user;
}

const char *urlGetPassword(struct URL *url) {
  return url->password;
}

const char *urlGetHost(struct URL *url) {
  return url->host;
}

int urlGetPort(struct URL *url) {
  return url->port;
}

const char *urlGetPath(struct URL *url) {
  return url->path;
}

const char *urlGetPathInfo(struct URL *url) {
  return url->pathinfo;
}

const char *urlGetQuery(struct URL *url) {
  return url->query;
}

const char *urlGetAnchor(struct URL *url) {
  return url->anchor;
}

const char *urlGetURL(struct URL *url) {
  if (!url->url) {
    const char *host           = urlGetHost(url);
    int s_size                 = 8 + strlen(host) + 25 + strlen(url->path);
    check(*(char **)&url->url  = malloc(s_size + 1));
    *url->url                  = '\000';
    strncat(url->url, url->protocol, s_size);
    strncat(url->url, "://", s_size);
    strncat(url->url, host, s_size);
    if (url->port != (strcmp(url->protocol, "http") ? 443 : 80)) {
      snprintf(strrchr(url->url, '\000'), 25, ":%d", url->port);
    }
    strncat(url->url, url->path, s_size);
  }
  return url->url;
}

const struct HashMap *urlGetArgs(struct URL *url) {
  return &url->args;
}

struct HashMap *urlParseQuery(const char *buf, int len) {
  struct HashMap *hashmap = newHashMap(urlDestroyHashMapEntry, NULL);
  urlParseQueryString(hashmap, buf, len);
  return hashmap;
}