Browse Source

Added a URI parser that should be standards conformant. (It can certainly

handle something as convoluted as

  ftp://joe:secret@insecure.org:8081/hidden/path/to?what=is#this
tags/v0.9.3
Michael Brown 18 years ago
parent
commit
26166cf5e0
2 changed files with 277 additions and 0 deletions
  1. 161
    0
      src/core/uri.c
  2. 116
    0
      src/include/gpxe/uri.h

+ 161
- 0
src/core/uri.c View File

@@ -0,0 +1,161 @@
1
+/*
2
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
+ */
18
+
19
+/** @file
20
+ *
21
+ * Uniform Resource Identifiers
22
+ *
23
+ */
24
+
25
+#include <stdint.h>
26
+#include <stdlib.h>
27
+#include <string.h>
28
+#include <gpxe/uri.h>
29
+
30
+/**
31
+ * Parse URI
32
+ *
33
+ * @v uri_string	URI as a string
34
+ * @ret uri		URI
35
+ *
36
+ * Splits a URI into its component parts.  The return URI structure is
37
+ * dynamically allocated and must eventually be freed by calling
38
+ * free_uri().
39
+ */
40
+struct uri * parse_uri ( const char *uri_string ) {
41
+	struct uri *uri;
42
+	char *raw;
43
+	char *tmp;
44
+	char *path = NULL;
45
+	char *authority = NULL;
46
+	size_t raw_len;
47
+
48
+	/* Allocate space for URI struct and a copy of the string */
49
+	raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
50
+	uri = malloc ( sizeof ( *uri ) + raw_len );
51
+	if ( ! uri )
52
+		return NULL;
53
+	raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
54
+
55
+	/* Zero URI struct and copy in the raw string */
56
+	memset ( uri, 0, sizeof ( *uri ) );
57
+	memcpy ( raw, uri_string, raw_len );
58
+
59
+	/* Start by chopping off the fragment, if it exists */
60
+	if ( ( tmp = strchr ( raw, '#' ) ) ) {
61
+		*(tmp++) = '\0';
62
+		uri->fragment = tmp;
63
+	}
64
+
65
+	/* Identify absolute/relative URI */
66
+	if ( ( tmp = strchr ( raw, ':' ) ) ) {
67
+		/* Absolute URI: identify hierarchical/opaque */
68
+		uri->scheme = raw;
69
+		*(tmp++) = '\0';
70
+		if ( *tmp == '/' ) {
71
+			/* Absolute URI with hierarchical part */
72
+			path = tmp;
73
+		} else {
74
+			/* Absolute URI with opaque part */
75
+			uri->opaque = tmp;
76
+		}
77
+	} else {
78
+		/* Relative URI */
79
+		path = raw;
80
+	}
81
+
82
+	/* If we don't have a path (i.e. we have an absolute URI with
83
+	 * an opaque portion, we're already finished processing
84
+	 */
85
+	if ( ! path )
86
+		goto done;
87
+
88
+	/* Chop off the query, if it exists */
89
+	if ( ( tmp = strchr ( path, '?' ) ) ) {
90
+		*(tmp++) = '\0';
91
+		uri->query = tmp;
92
+	}
93
+
94
+	/* Identify net/absolute/relative path */
95
+	if ( strncmp ( path, "//", 2 ) == 0 ) {
96
+		/* Net path.  If this is terminated by the first '/'
97
+		 * of an absolute path, then we have no space for a
98
+		 * terminator after the authority field, so shuffle
99
+		 * the authority down by one byte, overwriting one of
100
+		 * the two slashes.
101
+		 */
102
+		authority = ( path + 2 );
103
+		if ( ( tmp = strchr ( authority, '/' ) ) ) {
104
+			/* Shuffle down */
105
+			uri->path = tmp;
106
+			memmove ( ( authority - 1 ), authority,
107
+				  ( tmp - authority ) );
108
+			authority--;
109
+			*(--tmp) = '\0';
110
+		}
111
+	} else {
112
+		/* Absolute/relative path */
113
+		uri->path = path;
114
+	}
115
+
116
+	/* Split authority into user[:password] and host[:port] portions */
117
+	if ( ( tmp = strchr ( authority, '@' ) ) ) {
118
+		/* Has user[:password] */
119
+		*(tmp++) = '\0';
120
+		uri->host = tmp;
121
+		uri->user = authority;
122
+		if ( ( tmp = strchr ( authority, ':' ) ) ) {
123
+			/* Has password */
124
+			*(tmp++) = '\0';
125
+			uri->password = tmp;
126
+		}
127
+	} else {
128
+		/* No user:password */
129
+		uri->host = authority;
130
+	}
131
+
132
+	/* Split host into host[:port] */
133
+	if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
134
+		*(tmp++) = '\0';
135
+		uri->port = tmp;
136
+	}
137
+
138
+ done:
139
+	DBG ( "URI \"%s\" split into", raw );
140
+	if ( uri->scheme )
141
+		DBG ( " scheme \"%s\"", uri->scheme );
142
+	if ( uri->opaque )
143
+		DBG ( " opaque \"%s\"", uri->opaque );
144
+	if ( uri->user )
145
+		DBG ( " user \"%s\"", uri->user );
146
+	if ( uri->password )
147
+		DBG ( " password \"%s\"", uri->password );
148
+	if ( uri->host )
149
+		DBG ( " host \"%s\"", uri->host );
150
+	if ( uri->port )
151
+		DBG ( " port \"%s\"", uri->port );
152
+	if ( uri->path )
153
+		DBG ( " path \"%s\"", uri->path );
154
+	if ( uri->query )
155
+		DBG ( " query \"%s\"", uri->query );
156
+	if ( uri->fragment )
157
+		DBG ( " fragment \"%s\"", uri->fragment );
158
+	DBG ( "\n" );
159
+
160
+	return uri;
161
+}

+ 116
- 0
src/include/gpxe/uri.h View File

@@ -0,0 +1,116 @@
1
+#ifndef _GPXE_URI_H
2
+#define _GPXE_URI_H
3
+
4
+/** @file
5
+ *
6
+ * Uniform Resource Identifiers
7
+ *
8
+ */
9
+
10
+#include <stdlib.h>
11
+
12
+/** A Uniform Resource Identifier
13
+ *
14
+ * Terminology for this data structure is as per uri(7), except that
15
+ * "path" is defined to include the leading '/' for an absolute path.
16
+ *
17
+ * Note that all fields within a URI are optional and may be NULL.
18
+ *
19
+ * Some examples are probably helpful:
20
+ *
21
+ *   http://www.etherboot.org/wiki :
22
+ *
23
+ *   scheme = "http", host = "www.etherboot.org", path = "/wiki"
24
+ *
25
+ *   /var/lib/tftpboot :
26
+ *
27
+ *   path = "/var/lib/tftpboot"
28
+ *
29
+ *   mailto:bob@nowhere.com :
30
+ *
31
+ *   scheme = "mailto", opaque = "bob@nowhere.com"
32
+ *
33
+ *   ftp://joe:secret@insecure.org:8081/hidden/path/to?what=is#this
34
+ *
35
+ *   scheme = "ftp", user = "joe", password = "secret",
36
+ *   host = "insecure.org", port = "8081", path = "/hidden/path/to",
37
+ *   query = "what=is", fragment = "this"
38
+ */
39
+struct uri {
40
+	/** Scheme */
41
+	const char *scheme;
42
+	/** Opaque part */
43
+	const char *opaque;
44
+	/** User name */
45
+	const char *user;
46
+	/** Password */
47
+	const char *password;
48
+	/** Host name */
49
+	const char *host;
50
+	/** Port number */
51
+	const char *port;
52
+	/** Path */
53
+	const char *path;
54
+	/** Query */
55
+	const char *query;
56
+	/** Fragment */
57
+	const char *fragment;
58
+};
59
+
60
+/**
61
+ * URI is an absolute URI
62
+ *
63
+ * @v uri			URI
64
+ * @ret is_absolute		URI is absolute
65
+ *
66
+ * An absolute URI begins with a scheme, e.g. "http:" or "mailto:".
67
+ * Note that this is a separate concept from a URI with an absolute
68
+ * path.
69
+ */
70
+static inline int uri_is_absolute ( struct uri *uri ) {
71
+	return ( uri->scheme != NULL );
72
+}
73
+
74
+/**
75
+ * URI has an absolute path
76
+ *
77
+ * @v uri			URI
78
+ * @ret has_absolute_path	URI has an absolute path
79
+ *
80
+ * An absolute path begins with a '/'.  Note that this is a separate
81
+ * concept from an absolute URI.  Note also that a URI may not have a
82
+ * path at all.
83
+ */
84
+static inline int uri_has_absolute_path ( struct uri *uri ) {
85
+	return ( uri->path && ( uri->path[0] == '/' ) );
86
+}
87
+
88
+/**
89
+ * URI has a relative path
90
+ *
91
+ * @v uri			URI
92
+ * @ret has_relative_path	URI has a relative path
93
+ *
94
+ * An relative path begins with something other than a '/'.  Note that
95
+ * this is a separate concept from a relative URI.  Note also that a
96
+ * URI may not have a path at all.
97
+ */
98
+static inline int uri_has_relative_path ( struct uri *uri ) {
99
+	return ( uri->path && ( uri->path[0] != '/' ) );
100
+}
101
+
102
+/**
103
+ * Free URI structure
104
+ *
105
+ * @v uri		URI
106
+ *
107
+ * Frees all the dynamically-allocated storage used by the URI
108
+ * structure.
109
+ */
110
+static inline void free_uri ( struct uri *uri ) {
111
+	free ( uri );
112
+}
113
+
114
+extern struct uri * parse_uri ( const char *uri_string );
115
+
116
+#endif /* _GPXE_URI_H */

Loading…
Cancel
Save