Main Page   Data Structures   File List   Data Fields   Globals   Related Pages  

inet/url.cpp

Go to the documentation of this file.
00001 /*  url.cpp
00002     Parse a Uniform Resource Locator, according to RFC 1738
00003 
00004     13.08.2001: tk, initial implementation, based on url.c
00005                     from the PHP 4.06 source code.
00006 
00007     Copyright (c)2000 by Thomas Kindler, thomas.kindler@gmx.de
00008 
00009     This program is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU General Public License as
00011     published by the Free Software Foundation; either version 2 of
00012     the License, or (at your option) any later version. Read the
00013     full License at http://www.gnu.org/copyleft for more details.
00014 */
00015 
00016 // include files ----------
00017 //
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <string.h>
00021 #include <misc.h>
00022 #include <ctype.h>
00023 #include "inet/url.h"
00024 
00025 
00026 typedef struct {
00027   char    *scheme;
00028   unsigned port;
00029 } scheme_t;
00030 
00031 
00032 static scheme_t schemes[] = {
00033   {"ftp",      DEFAULT_FTP_PORT},
00034   {"telnet",   DEFAULT_TELNET_PORT},
00035   {"mailto",   DEFAULT_SMTP_PORT},
00036   {"gopher",   DEFAULT_GOPHER_PORT},
00037   {"http",     DEFAULT_HTTP_PORT},
00038   {"nntp",     DEFAULT_NNTP_PORT},
00039   {"news",     DEFAULT_NNTP_PORT},
00040   {"wais",     DEFAULT_WAIS_PORT},
00041   {"https",    DEFAULT_HTTPS_PORT},
00042   {"snews",    DEFAULT_SNEWS_PORT},
00043   { NULL, 0 }  // unknown port
00044 };
00045 
00046 
00055 static unsigned port_for_scheme(char *scheme)
00056 {
00057   scheme_t *s = schemes;
00058   while (s->scheme) {
00059     if (!stricmp(s->scheme, scheme))
00060       return s->port;
00061     s++;
00062   }
00063   return 0;
00064 }
00065 
00066 
00067 static unsigned char hexchars[] = "0123456789ABCDEF";
00068 
00069 
00076 static int htoi(char *s)
00077 {
00078   int value;
00079   int c;
00080 
00081   c = s[0];
00082   if (isupper(c))
00083     c = tolower(c);
00084   value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
00085 
00086   c = s[1];
00087   if (isupper(c))
00088     c = tolower(c);
00089   value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
00090 
00091   return (value);
00092 }
00093 
00094 
00103 void free_url(URL *url)
00104 {
00105   if (url->scheme)   free(url->scheme);
00106   if (url->user)     free(url->user);
00107   if (url->pass)     free(url->pass);
00108   if (url->host)     free(url->host);
00109   if (url->path)     free(url->path);
00110   if (url->query)    free(url->query);
00111   if (url->fragment) free(url->fragment);
00112   free(url);
00113 }
00114 
00115 
00133 URL *parse_url(const char *urlString)
00134 {
00135   // make a working copy of the URL string
00136   char *urlStringCopy = strdup(urlString);
00137   if (!urlStringCopy)
00138     return NULL;
00139 
00140   // allocate and clear an URL structure
00141   URL *url = (URL*)malloc(sizeof(URL));
00142   if (!url) {
00143     free(urlStringCopy);
00144     return NULL;
00145   }
00146   memset(url, 0, sizeof(URL));
00147 
00148   char *s = urlStringCopy;
00149   char *p1, *p2;
00150 
00151   // get scheme component (before first ':')
00152   p1 = strchr(s, ':');
00153   if (p1) {
00154     *p1 = '\0';
00155     url->scheme = strdup(s);
00156     s = p1+1;
00157 
00158     // skip the "//" for http, ftp, etc..
00159     if (*s == '/') s++;
00160     if (*s == '/') s++;
00161   }
00162 
00163   // get username and password ( user:pass@ )
00164   p1 = strchr(s, '@');
00165   if (p1) {
00166     *p1 = '\0';
00167     p2 = strchr(s, ':');
00168     if (p2) {
00169       *p2 = '\0';
00170       url->pass = strdup(p2+1);
00171     }
00172     url->user = strdup(s);
00173     s = p1+1;
00174   }
00175 
00176   // find end of hostname (find next '\0' or '/')
00177   p1 = strchr(s, '/');
00178   if (p1)
00179     *p1 = '\0';
00180 
00181   // look for a port number (host:port)
00182   p2 = strchr(s, ':');
00183   if (p2) {
00184     *p2 = '\0';
00185     sscanf(p2+1, "%u", &url->port);
00186   }
00187 
00188   // copy remaining string as hostname
00189   url->host = strdup(s);
00190   if (p1)
00191     s = p1+1;
00192 
00193   // check for path if there was a '/' after hostname
00194   if (p1) {
00195     // look for fragment at the end of the path
00196     p1 = strchr(s, '#');
00197     if (p1) {
00198       *p1 = '\0';
00199       url->fragment = strdup(p1+1);
00200     }
00201 
00202     // look for query at the end of the path
00203     p1 = strchr(s, '?');
00204     if (p1) {
00205       *p1 = '\0';
00206       url->query = strdup(p1+1);
00207     }
00208 
00209     // copy remaining string as path
00210     url->path = strdup(s);
00211   }
00212   free(urlStringCopy);
00213 
00214   if (!url->port)
00215     url->port = port_for_scheme(url->scheme);
00216 
00217   return url;
00218 }
00219 
00220 
00234 char *url_encode(char *s, unsigned len)
00235 {
00236   char *str = (char *)malloc(3*len + 1);
00237   if (!str)
00238     return NULL;
00239 
00240   int x,y;
00241   for (x=0, y=0; len--; x++, y++) {
00242     str[y]=(char)s[x];
00243     if (
00244       (str[y] < '0' && str[y] != '-' && str[y] != '.') ||
00245       (str[y] < 'A' && str[y] > '9') ||
00246       (str[y] > 'Z' && str[y] < 'a' && str[y] != '_') ||
00247       (str[y] > 'z')
00248     ) {
00249       str[y++] = '%';
00250       str[y++] = hexchars[(unsigned char) s[x] >> 4];
00251       str[y]   = hexchars[(unsigned char) s[x] & 15];
00252     }
00253   }
00254   str[y] = '\0';
00255   return str;
00256 }
00257 
00258 
00271 unsigned url_decode(char *str, unsigned len)
00272 {
00273   char *dst = str;
00274   char *src = str;
00275 
00276   while (len--) {
00277     if (  *src == '%' &&  len  >=   2 &&
00278           isxdigit(src[1]) &&  isxdigit(src[2])  )
00279     {
00280       *dst++ = (char)htoi(src + 1);
00281       src   += 2;
00282       len   -= 2;
00283     } else {
00284       *dst++ = *src++;
00285     }
00286   }
00287   *dst = '\0';
00288 
00289   return (unsigned)(dst-str);
00290 }
00291 

Generated on Sun Aug 4 21:47:28 2002 for k/os mp3v2 by doxygen1.2.16