Main Page | Data Structures | File List | Globals

include/pstring.h

00001 /* String functions which allocate strings on the pool.
00002  * By Richard W.M. Jones <rich@annexia.org>
00003  *
00004  * This library is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU Library General Public
00006  * License as published by the Free Software Foundation; either
00007  * version 2 of the License, or (at your option) any later version.
00008  *
00009  * This library is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * Library General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU Library General Public
00015  * License along with this library; if not, write to the Free
00016  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00017  *
00018  * $Id: pstring.h,v 1.1 2003/03/28 15:51:00 chappell Exp $
00019  */
00020 
00021 #ifndef PSTRING_H
00022 #define PSTRING_H
00023 
00024 #include <stdio.h>
00025 #include <stdarg.h>
00026 
00027 #include <pcre.h>
00028 
00029 #include <pool.h>
00030 #include <vector.h>
00031 
00032 /* Function: pstrdup - duplicate a string or area of memory
00033  * Function: pstrndup
00034  * Function: pmemdup
00035  *
00036  * @code{pstrdup} duplicates string @code{s}, allocating new memory for the
00037  * string in pool @code{pool}.
00038  *
00039  * @code{pstrndup} duplicates just the first @code{n} characters of the
00040  * string.
00041  *
00042  * @code{pmemdup} duplicates an arbitrary area of memory of size
00043  * @code{size} bytes starting at address @code{data}.
00044  */
00045 extern char *pstrdup (pool, const char *s);
00046 extern char *pstrndup (pool, const char *s, int n);
00047 extern void *pmemdup (pool, const void *data, size_t size);
00048 
00049 /* Function: pstrsplit - split a string on a character, string or regexp.
00050  * Function: pstrcsplit
00051  * Function: pstrresplit
00052  * Function: pstrsplit2
00053  * Function: pstrcsplit2
00054  * Function: pstrresplit2
00055  * 
00056  * These functions split string @code{str} on either a string
00057  * @code{sep}, a character @code{c} or a regular expression @code{re}.
00058  *
00059  * The result is a vector of newly created substrings.
00060  *
00061  * The @code{*2} variants split the string in the same way
00062  * on the regular expression, but keeps the matching splitting text as
00063  * separate elements in the vector. To illustrate this, imagine that
00064  * @code{pstrresplit} and @code{pstrresplit2} are called on the string
00065  * "This text is <b>bold</b>" with the regular expression @code{[<>]}.
00066  *
00067  * @code{pstrresplit} will return a vector containing:
00068  *
00069  * @code{ ( "This text is ", "b", "bold", "/b" ) }
00070  *
00071  * whereas @code{pstrcsplit2} will return:
00072  *
00073  * @code{ ( "This text is ", "<", "b", ">", "bold", "<", "/b", ">" ) }
00074  *
00075  * Note that the first element of the vector might be splitting
00076  * text, or might be ordinary text as in the example above. Also
00077  * the elements may not be interleaved like this (think about
00078  * what would happen if the original string contained @code{"<b></b>"}).
00079  * The only way to decide would be to call @code{prematch} on each element.
00080  *
00081  * This turns out to be very useful for certain sorts of simple
00082  * parsing, or if you need to reconstruct the original string (just
00083  * concatenate all of the elements together using @code{pconcat}).
00084  *
00085  * In common with Perl's @code{split} function, all of these functions
00086  * return a zero length vector if @code{str} is the empty string.
00087  *
00088  * See also: @ref{prematch(3)}, @ref{pconcat(3)}.
00089  */
00090 extern vector pstrsplit (pool, const char *str, const char *sep);
00091 extern vector pstrcsplit (pool, const char *str, char c);
00092 extern vector pstrresplit (pool, const char *str, const pcre *re);
00093 extern vector pstrsplit2 (pool, const char *str, const char *sep);
00094 extern vector pstrcsplit2 (pool, const char *str, char c);
00095 extern vector pstrresplit2 (pool, const char *str, const pcre *re);
00096 
00097 /* Function: pconcat - concatenate a vector of strings
00098  * Function: pjoin
00099  *
00100  * @code{pconcat} concatenates a vector of strings to form a string.
00101  *
00102  * @code{pjoin} is similar except that @code{sep} is inserted between
00103  * each concatenated string in the output.
00104  *
00105  * @code{pjoin} is kind of the opposite of @ref{pstrsplit(3)}.
00106  */
00107 extern char *pconcat (pool, vector);
00108 extern char *pjoin (pool, vector, const char *sep);
00109 
00110 /* Function: pchrs - generate a string of n repeated characters or strings
00111  * Function: pstrs
00112  *
00113  * @code{pchrs (pool, 'c', n)} is similar to the Perl expression
00114  * @code{'c' x n}. It generates a pool-allocated string of @code{n} copies
00115  * of character @code{'c'}.
00116  *
00117  * @code{pstrs (pool, str, n)} is similar to the Perl expression
00118  * @code{str x n}. It generates a pool-allocated string of @code{n} copies
00119  * of the string @code{str}.
00120  */
00121 extern char *pchrs (pool, char c, int n);
00122 extern char *pstrs (pool, const char *str, int n);
00123 
00124 /* Function: pvector - generate a vector from a list or array of strings
00125  * Function: pvectora
00126  *
00127  * @code{pvector} takes a NULL-terminated list of strings as arguments
00128  * and returns a vector of strings. @code{pvectora} takes a pointer to
00129  * an array of strings and the number of strings and returns a vector
00130  * of strings.
00131  *
00132  * A typical use of this is to quickly concatenate strings:
00133  *
00134  * @code{s = pconcat (pool, pvector (pool, s1, s2, s3, NULL));}
00135  *
00136  * which is roughly equivalent to:
00137  *
00138  * @code{s = psprintf (pool, "%s%s%s", s1, s2, s3);}
00139  *
00140  * See also: @ref{pconcat(3)}, @ref{psprintf(3)}.
00141  */
00142 extern vector pvector (pool, ...);
00143 extern vector pvectora (pool, const char *array[], int n);
00144 
00145 /* Function: psort - sort a vector of strings
00146  *
00147  * Sort a vector of strings, using @code{compare_fn} to compare
00148  * strings. The vector is sorted in-place.
00149  *
00150  * It is a common mistake to try to use @code{strcmp} directly
00151  * as your comparison function. This will not work. See the
00152  * C FAQ, section 12, question 12.2
00153  * (@code{http://www.lysator.liu.se/c/c-faq/c-12.html}).
00154  */
00155 extern void psort (vector, int (*compare_fn) (const char **, const char **));
00156 
00157 /* Function: pchomp - remove line endings from a string
00158  *
00159  * Remove line endings (either CR, CRLF or LF) from the string argument.
00160  * The string is modified in-place and a pointer to the string
00161  * is also returned.
00162  */
00163 extern char *pchomp (char *line);
00164 
00165 /* Function: ptrim - remove whitespace from the ends of a string
00166  * Function: ptrimfront
00167  * Function: ptrimback
00168  *
00169  * @code{ptrim} modifies a string of text in place, removing any
00170  * whitespace characters from the beginning and end of the line.
00171  *
00172  * @code{ptrimfront} is the same as @code{ptrim} but only removes
00173  * whitespace from the beginning of a string.
00174  *
00175  * @code{ptrimback} is the same as @code{ptrim} but only removes
00176  * whitespace from the end of a string.
00177  */
00178 extern char *ptrim (char *str);
00179 extern char *ptrimfront (char *str);
00180 extern char *ptrimback (char *str);
00181 
00182 /* Function: psprintf - sprintf which allocates the result in a pool
00183  * Function: pvsprintf
00184  *
00185  * The @code{psprintf} function is equivalent to @code{sprintf}
00186  * but it allocates the result string in @code{pool}.
00187  *
00188  * @code{pvsprintf} works similarly to @code{vsprintf}.
00189  */
00190 extern char *psprintf (pool, const char *format, ...) __attribute__ ((format (printf, 2, 3)));
00191 extern char *pvsprintf (pool, const char *format, va_list ap);
00192 
00193 /* Function: pitoa - convert number types to strings
00194  * Function: pdtoa
00195  * Function: pxtoa
00196  *
00197  * These functions convert a decimal @code{int}, @code{double} or
00198  * hexadecimal @code{unsigned} into a string, which is allocated
00199  * in @code{pool}.
00200  *
00201  * @code{pitoa} is equivalent to @code{psprintf (pool, "%d", i)},
00202  * and the other functions have similar equivalents.
00203  */
00204 extern char *pitoa (pool, int);
00205 extern char *pdtoa (pool, double);
00206 extern char *pxtoa (pool, unsigned);
00207 
00208 /* Function: pvitostr - convert vectors of numbers to vectors of strings
00209  * Function: pvdtostr
00210  * Function: pvxtostr
00211  *
00212  * Promote vector of numbers to vector of strings.
00213  *
00214  * @code{pvitostr} expects a vector of @code{int}.
00215  *
00216  * @code{pvdtostr} expects a vector of @code{double}.
00217  *
00218  * @code{pvxtostr} expects a vector of hexadecimal @code{unsigned}.
00219  *
00220  * All functions return a vector of @code{char *}.
00221  */
00222 extern vector pvitostr (pool, vector);
00223 extern vector pvdtostr (pool, vector);
00224 extern vector pvxtostr (pool, vector);
00225 
00226 /* Function: pstrcat - extend a string
00227  * Function: pstrncat
00228  *
00229  * @code{str} is a string allocated in @code{pool}.
00230  * Append @code{ending} to @code{str}, reallocating
00231  * @code{str} if necessary.
00232  *
00233  * Because @code{str} may be reallocated (ie. moved) you
00234  * must invoke this function as follows:
00235  *
00236  * @code{str = pstrcat (pool, str, ending);}
00237  *
00238  * @code{pstrncat} is similar to @code{pstrcat} except that
00239  * only the first @code{n} characters of @code{ending}
00240  * are appended to @code{str}.
00241  */
00242 extern char *pstrcat (pool, char *str, const char *ending);
00243 extern char *pstrncat (pool, char *str, const char *ending, size_t n);
00244 
00245 /* Function: psubstr - return a substring of a string
00246  *
00247  * Return the substring starting at @code{offset} and of length
00248  * @code{len} of @code{str}, allocated
00249  * as a new string. If @code{len} is negative,
00250  * everything up to the end of @code{str}
00251  * is returned.
00252  */
00253 extern char *psubstr (pool, const char *str, int offset, int len);
00254 
00255 /* Function: pstrupr - convert a string to upper- or lowercase
00256  * Function: pstrlwr
00257  *
00258  * Convert a string, in-place, to upper or lowercase by applying
00259  * @code{toupper} or @code{tolower} to each character in turn.
00260  */
00261 extern char *pstrupr (char *str);
00262 extern char *pstrlwr (char *str);
00263 
00264 /* Function: pgetline - read a line from a file, optionally removing comments
00265  * Function: pgetlinex
00266  * Function: pgetlinec
00267  *
00268  * @code{pgetline} reads a single line from a file and returns it. It
00269  * allocates enough space to read lines of arbitrary length. Line ending
00270  * characters ('\r' and '\n') are automatically removed from the end
00271  * of the line.
00272  *
00273  * The @code{pool} argument is a pool for allocating the line. The
00274  * @code{fp} argument is the C @code{FILE} pointer. The @code{line}
00275  * argument is a pointer to a string allocated in pool which will
00276  * be reallocated and filled with the contents of the line. You may
00277  * pass @code{line} as @code{NULL} to get a newly allocated buffer.
00278  *
00279  * Use @code{pgetline} in one of the following two ways:
00280  *
00281  * @code{line = pgetline (pool, fp, line);}
00282  *
00283  * or
00284  *
00285  * @code{line = pgetline (pool, fp, NULL);}
00286  *
00287  * @code{pgetlinex} is a more advanced function which reads a line
00288  * from a file, optionally removing comments, concatenating together
00289  * lines which have been split with a backslash, and ignoring blank
00290  * lines. @code{pgetlinex} (and the related macro @code{pgetlinec}) are
00291  * very useful for reading lines of input from a configuration file.
00292  *
00293  * The @code{pool} argument is a pool for allocating the line. The
00294  * @code{fp} argument is the C @code{FILE} pointer. The @code{line}
00295  * argument is a buffer allocated in pool which will be reallocated
00296  * and filled with the result. @code{comment_set} is the set of
00297  * possible comment characters -- eg. @code{"#!"} to allow either
00298  * @code{#} or @code{!} to be used to introduce comments.
00299  * @code{flags} is zero or more of the following flags OR-ed
00300  * together:
00301  *
00302  * @code{PGETL_NO_CONCAT}: Don't concatenate lines which have been
00303  * split with trailing backslash characters.
00304  *
00305  * @code{PGETL_INLINE_COMMENTS}: Treat everything following a comment
00306  * character as a comment. The default is to only allow comments which
00307  * appear on a line on their own.
00308  *
00309  * @code{pgetlinec} is a helper macro which calls @code{pgetlinex}
00310  * with @code{comment_set == "#"} and @code{flags == 0}.
00311  */
00312 extern char *pgetline (pool, FILE *fp, char *line);
00313 extern char *pgetlinex (pool, FILE *fp, char *line, const char *comment_set, int flags);
00314 #define pgetlinec(p,fp,line) pgetlinex ((p), (fp), (line), "#", 0)
00315 
00316 #define PGETL_NO_CONCAT 1
00317 #define PGETL_INLINE_COMMENTS 2
00318 
00319 /* Function: pmap - map, search vectors of strings
00320  * Function: pgrep
00321  *
00322  * @code{pmap} takes a @code{vector} of strings (@code{char *}) and
00323  * transforms it into another @code{vector} of strings by applying
00324  * the function @code{char *map_fn (pool, const char *)} to each
00325  * string.
00326  *
00327  * @code{pgrep} applies the function @code{int grep_fn (pool, const char *)}
00328  * to each element in a @code{vector} of strings, and returns a
00329  * new vector of strings containing only those strings where
00330  * @code{grep_fn} returns true.
00331  *
00332  * See also: @ref{vector_map_pool(3)}, @ref{vector_grep_pool(3)}.
00333  */
00334 vector pmap (pool, const vector v, char *(*map_fn) (pool, const char *));
00335 vector pgrep (pool, const vector v, int (*grep_fn) (pool, const char *));
00336 
00337 #endif /* PSTRING_H */

Generated on Fri Feb 20 15:17:48 2004 for PPR Libraries by doxygen 1.3.5