00001 /* String functions which allocate strings on the pool. 00002 * By Richard W.M. Jones <rich@annexia.org> 00003 * 00004 * This library is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU Library General Public 00006 * License as published by the Free Software Foundation; either 00007 * version 2 of the License, or (at your option) any later version. 00008 * 00009 * This library is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 * Library General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU Library General Public 00015 * License along with this library; if not, write to the Free 00016 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00017 * 00018 * $Id: pstring.h,v 1.1 2003/03/28 15:51:00 chappell Exp $ 00019 */ 00020 00021 #ifndef PSTRING_H 00022 #define PSTRING_H 00023 00024 #include <stdio.h> 00025 #include <stdarg.h> 00026 00027 #include <pcre.h> 00028 00029 #include <pool.h> 00030 #include <vector.h> 00031 00032 /* Function: pstrdup - duplicate a string or area of memory 00033 * Function: pstrndup 00034 * Function: pmemdup 00035 * 00036 * @code{pstrdup} duplicates string @code{s}, allocating new memory for the 00037 * string in pool @code{pool}. 00038 * 00039 * @code{pstrndup} duplicates just the first @code{n} characters of the 00040 * string. 00041 * 00042 * @code{pmemdup} duplicates an arbitrary area of memory of size 00043 * @code{size} bytes starting at address @code{data}. 00044 */ 00045 extern char *pstrdup (pool, const char *s); 00046 extern char *pstrndup (pool, const char *s, int n); 00047 extern void *pmemdup (pool, const void *data, size_t size); 00048 00049 /* Function: pstrsplit - split a string on a character, string or regexp. 00050 * Function: pstrcsplit 00051 * Function: pstrresplit 00052 * Function: pstrsplit2 00053 * Function: pstrcsplit2 00054 * Function: pstrresplit2 00055 * 00056 * These functions split string @code{str} on either a string 00057 * @code{sep}, a character @code{c} or a regular expression @code{re}. 00058 * 00059 * The result is a vector of newly created substrings. 00060 * 00061 * The @code{*2} variants split the string in the same way 00062 * on the regular expression, but keeps the matching splitting text as 00063 * separate elements in the vector. To illustrate this, imagine that 00064 * @code{pstrresplit} and @code{pstrresplit2} are called on the string 00065 * "This text is <b>bold</b>" with the regular expression @code{[<>]}. 00066 * 00067 * @code{pstrresplit} will return a vector containing: 00068 * 00069 * @code{ ( "This text is ", "b", "bold", "/b" ) } 00070 * 00071 * whereas @code{pstrcsplit2} will return: 00072 * 00073 * @code{ ( "This text is ", "<", "b", ">", "bold", "<", "/b", ">" ) } 00074 * 00075 * Note that the first element of the vector might be splitting 00076 * text, or might be ordinary text as in the example above. Also 00077 * the elements may not be interleaved like this (think about 00078 * what would happen if the original string contained @code{"<b></b>"}). 00079 * The only way to decide would be to call @code{prematch} on each element. 00080 * 00081 * This turns out to be very useful for certain sorts of simple 00082 * parsing, or if you need to reconstruct the original string (just 00083 * concatenate all of the elements together using @code{pconcat}). 00084 * 00085 * In common with Perl's @code{split} function, all of these functions 00086 * return a zero length vector if @code{str} is the empty string. 00087 * 00088 * See also: @ref{prematch(3)}, @ref{pconcat(3)}. 00089 */ 00090 extern vector pstrsplit (pool, const char *str, const char *sep); 00091 extern vector pstrcsplit (pool, const char *str, char c); 00092 extern vector pstrresplit (pool, const char *str, const pcre *re); 00093 extern vector pstrsplit2 (pool, const char *str, const char *sep); 00094 extern vector pstrcsplit2 (pool, const char *str, char c); 00095 extern vector pstrresplit2 (pool, const char *str, const pcre *re); 00096 00097 /* Function: pconcat - concatenate a vector of strings 00098 * Function: pjoin 00099 * 00100 * @code{pconcat} concatenates a vector of strings to form a string. 00101 * 00102 * @code{pjoin} is similar except that @code{sep} is inserted between 00103 * each concatenated string in the output. 00104 * 00105 * @code{pjoin} is kind of the opposite of @ref{pstrsplit(3)}. 00106 */ 00107 extern char *pconcat (pool, vector); 00108 extern char *pjoin (pool, vector, const char *sep); 00109 00110 /* Function: pchrs - generate a string of n repeated characters or strings 00111 * Function: pstrs 00112 * 00113 * @code{pchrs (pool, 'c', n)} is similar to the Perl expression 00114 * @code{'c' x n}. It generates a pool-allocated string of @code{n} copies 00115 * of character @code{'c'}. 00116 * 00117 * @code{pstrs (pool, str, n)} is similar to the Perl expression 00118 * @code{str x n}. It generates a pool-allocated string of @code{n} copies 00119 * of the string @code{str}. 00120 */ 00121 extern char *pchrs (pool, char c, int n); 00122 extern char *pstrs (pool, const char *str, int n); 00123 00124 /* Function: pvector - generate a vector from a list or array of strings 00125 * Function: pvectora 00126 * 00127 * @code{pvector} takes a NULL-terminated list of strings as arguments 00128 * and returns a vector of strings. @code{pvectora} takes a pointer to 00129 * an array of strings and the number of strings and returns a vector 00130 * of strings. 00131 * 00132 * A typical use of this is to quickly concatenate strings: 00133 * 00134 * @code{s = pconcat (pool, pvector (pool, s1, s2, s3, NULL));} 00135 * 00136 * which is roughly equivalent to: 00137 * 00138 * @code{s = psprintf (pool, "%s%s%s", s1, s2, s3);} 00139 * 00140 * See also: @ref{pconcat(3)}, @ref{psprintf(3)}. 00141 */ 00142 extern vector pvector (pool, ...); 00143 extern vector pvectora (pool, const char *array[], int n); 00144 00145 /* Function: psort - sort a vector of strings 00146 * 00147 * Sort a vector of strings, using @code{compare_fn} to compare 00148 * strings. The vector is sorted in-place. 00149 * 00150 * It is a common mistake to try to use @code{strcmp} directly 00151 * as your comparison function. This will not work. See the 00152 * C FAQ, section 12, question 12.2 00153 * (@code{http://www.lysator.liu.se/c/c-faq/c-12.html}). 00154 */ 00155 extern void psort (vector, int (*compare_fn) (const char **, const char **)); 00156 00157 /* Function: pchomp - remove line endings from a string 00158 * 00159 * Remove line endings (either CR, CRLF or LF) from the string argument. 00160 * The string is modified in-place and a pointer to the string 00161 * is also returned. 00162 */ 00163 extern char *pchomp (char *line); 00164 00165 /* Function: ptrim - remove whitespace from the ends of a string 00166 * Function: ptrimfront 00167 * Function: ptrimback 00168 * 00169 * @code{ptrim} modifies a string of text in place, removing any 00170 * whitespace characters from the beginning and end of the line. 00171 * 00172 * @code{ptrimfront} is the same as @code{ptrim} but only removes 00173 * whitespace from the beginning of a string. 00174 * 00175 * @code{ptrimback} is the same as @code{ptrim} but only removes 00176 * whitespace from the end of a string. 00177 */ 00178 extern char *ptrim (char *str); 00179 extern char *ptrimfront (char *str); 00180 extern char *ptrimback (char *str); 00181 00182 /* Function: psprintf - sprintf which allocates the result in a pool 00183 * Function: pvsprintf 00184 * 00185 * The @code{psprintf} function is equivalent to @code{sprintf} 00186 * but it allocates the result string in @code{pool}. 00187 * 00188 * @code{pvsprintf} works similarly to @code{vsprintf}. 00189 */ 00190 extern char *psprintf (pool, const char *format, ...) __attribute__ ((format (printf, 2, 3))); 00191 extern char *pvsprintf (pool, const char *format, va_list ap); 00192 00193 /* Function: pitoa - convert number types to strings 00194 * Function: pdtoa 00195 * Function: pxtoa 00196 * 00197 * These functions convert a decimal @code{int}, @code{double} or 00198 * hexadecimal @code{unsigned} into a string, which is allocated 00199 * in @code{pool}. 00200 * 00201 * @code{pitoa} is equivalent to @code{psprintf (pool, "%d", i)}, 00202 * and the other functions have similar equivalents. 00203 */ 00204 extern char *pitoa (pool, int); 00205 extern char *pdtoa (pool, double); 00206 extern char *pxtoa (pool, unsigned); 00207 00208 /* Function: pvitostr - convert vectors of numbers to vectors of strings 00209 * Function: pvdtostr 00210 * Function: pvxtostr 00211 * 00212 * Promote vector of numbers to vector of strings. 00213 * 00214 * @code{pvitostr} expects a vector of @code{int}. 00215 * 00216 * @code{pvdtostr} expects a vector of @code{double}. 00217 * 00218 * @code{pvxtostr} expects a vector of hexadecimal @code{unsigned}. 00219 * 00220 * All functions return a vector of @code{char *}. 00221 */ 00222 extern vector pvitostr (pool, vector); 00223 extern vector pvdtostr (pool, vector); 00224 extern vector pvxtostr (pool, vector); 00225 00226 /* Function: pstrcat - extend a string 00227 * Function: pstrncat 00228 * 00229 * @code{str} is a string allocated in @code{pool}. 00230 * Append @code{ending} to @code{str}, reallocating 00231 * @code{str} if necessary. 00232 * 00233 * Because @code{str} may be reallocated (ie. moved) you 00234 * must invoke this function as follows: 00235 * 00236 * @code{str = pstrcat (pool, str, ending);} 00237 * 00238 * @code{pstrncat} is similar to @code{pstrcat} except that 00239 * only the first @code{n} characters of @code{ending} 00240 * are appended to @code{str}. 00241 */ 00242 extern char *pstrcat (pool, char *str, const char *ending); 00243 extern char *pstrncat (pool, char *str, const char *ending, size_t n); 00244 00245 /* Function: psubstr - return a substring of a string 00246 * 00247 * Return the substring starting at @code{offset} and of length 00248 * @code{len} of @code{str}, allocated 00249 * as a new string. If @code{len} is negative, 00250 * everything up to the end of @code{str} 00251 * is returned. 00252 */ 00253 extern char *psubstr (pool, const char *str, int offset, int len); 00254 00255 /* Function: pstrupr - convert a string to upper- or lowercase 00256 * Function: pstrlwr 00257 * 00258 * Convert a string, in-place, to upper or lowercase by applying 00259 * @code{toupper} or @code{tolower} to each character in turn. 00260 */ 00261 extern char *pstrupr (char *str); 00262 extern char *pstrlwr (char *str); 00263 00264 /* Function: pgetline - read a line from a file, optionally removing comments 00265 * Function: pgetlinex 00266 * Function: pgetlinec 00267 * 00268 * @code{pgetline} reads a single line from a file and returns it. It 00269 * allocates enough space to read lines of arbitrary length. Line ending 00270 * characters ('\r' and '\n') are automatically removed from the end 00271 * of the line. 00272 * 00273 * The @code{pool} argument is a pool for allocating the line. The 00274 * @code{fp} argument is the C @code{FILE} pointer. The @code{line} 00275 * argument is a pointer to a string allocated in pool which will 00276 * be reallocated and filled with the contents of the line. You may 00277 * pass @code{line} as @code{NULL} to get a newly allocated buffer. 00278 * 00279 * Use @code{pgetline} in one of the following two ways: 00280 * 00281 * @code{line = pgetline (pool, fp, line);} 00282 * 00283 * or 00284 * 00285 * @code{line = pgetline (pool, fp, NULL);} 00286 * 00287 * @code{pgetlinex} is a more advanced function which reads a line 00288 * from a file, optionally removing comments, concatenating together 00289 * lines which have been split with a backslash, and ignoring blank 00290 * lines. @code{pgetlinex} (and the related macro @code{pgetlinec}) are 00291 * very useful for reading lines of input from a configuration file. 00292 * 00293 * The @code{pool} argument is a pool for allocating the line. The 00294 * @code{fp} argument is the C @code{FILE} pointer. The @code{line} 00295 * argument is a buffer allocated in pool which will be reallocated 00296 * and filled with the result. @code{comment_set} is the set of 00297 * possible comment characters -- eg. @code{"#!"} to allow either 00298 * @code{#} or @code{!} to be used to introduce comments. 00299 * @code{flags} is zero or more of the following flags OR-ed 00300 * together: 00301 * 00302 * @code{PGETL_NO_CONCAT}: Don't concatenate lines which have been 00303 * split with trailing backslash characters. 00304 * 00305 * @code{PGETL_INLINE_COMMENTS}: Treat everything following a comment 00306 * character as a comment. The default is to only allow comments which 00307 * appear on a line on their own. 00308 * 00309 * @code{pgetlinec} is a helper macro which calls @code{pgetlinex} 00310 * with @code{comment_set == "#"} and @code{flags == 0}. 00311 */ 00312 extern char *pgetline (pool, FILE *fp, char *line); 00313 extern char *pgetlinex (pool, FILE *fp, char *line, const char *comment_set, int flags); 00314 #define pgetlinec(p,fp,line) pgetlinex ((p), (fp), (line), "#", 0) 00315 00316 #define PGETL_NO_CONCAT 1 00317 #define PGETL_INLINE_COMMENTS 2 00318 00319 /* Function: pmap - map, search vectors of strings 00320 * Function: pgrep 00321 * 00322 * @code{pmap} takes a @code{vector} of strings (@code{char *}) and 00323 * transforms it into another @code{vector} of strings by applying 00324 * the function @code{char *map_fn (pool, const char *)} to each 00325 * string. 00326 * 00327 * @code{pgrep} applies the function @code{int grep_fn (pool, const char *)} 00328 * to each element in a @code{vector} of strings, and returns a 00329 * new vector of strings containing only those strings where 00330 * @code{grep_fn} returns true. 00331 * 00332 * See also: @ref{vector_map_pool(3)}, @ref{vector_grep_pool(3)}. 00333 */ 00334 vector pmap (pool, const vector v, char *(*map_fn) (pool, const char *)); 00335 vector pgrep (pool, const vector v, int (*grep_fn) (pool, const char *)); 00336 00337 #endif /* PSTRING_H */