/* * Copyright (c) 2003-2008 Hypertriton, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1987 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that the above copyright notice and this paragraph are * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed * by the University of California, Berkeley. The name of the * University may not be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ /* * Copyright (c) 1998 Todd C. Miller * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND TODD C. MILLER DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL TODD C. MILLER BE LIABLE * FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include /* * This array is designed for mapping upper and lower case letter * together for a case independent comparison. The mappings are * based upon ASCII character sequences. */ const unsigned char agStrcasecmpMapASCII[] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', '\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337', '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', }; /* * Copy src to string dst of size siz. At most siz-1 characters * will be copied. Always NUL terminates (unless siz == 0). * Returns strlen(src); if retval >= siz, truncation occurred. */ size_t AG_Strlcpy(char *dst, const char *src, size_t siz) { char *d = dst; const char *s = src; size_t n = siz; /* Copy as many bytes as will fit */ if (n != 0 && --n != 0) { do { if ((*d++ = *s++) == 0) { break; } } while (--n != 0); } /* Not enough room in dst, add NUL and traverse rest of src */ if (n == 0) { if (siz != 0) *d = '\0'; /* NUL-terminate dst */ while (*s++) ; } return (s - src - 1); /* count does not include NUL */ } /* UCS-4 version of Strlcpy() */ size_t AG_StrlcpyUCS4(Uint32 *dst, const Uint32 *src, size_t bytes) { Uint32 *d = dst; const Uint32 *s = src; size_t n = bytes / sizeof(Uint32); /* Copy as many characters as will fit. */ if (n != 0 && --n != 0) { do { if ((*d++ = *s++) == 0) { break; } } while (--n != 0); } /* Not enough room in dst, add NUL and traverse rest of src. */ if (n == 0) { if (bytes != 0) { *d = '\0'; /* NUL-terminate dst */ } while (*s++) ; } return ((s - src - 1) * sizeof(Uint32)); /* Does not include NUL */ } /* * Appends src to string dst of size siz (unlike strncat, siz is the * full size of dst, not space left). At most siz-1 characters * will be copied. Always NUL terminates (unless siz <= strlen(dst)). * Returns strlen(src) + MIN(siz, strlen(initial dst)). * If retval >= siz, truncation occurred. */ size_t AG_Strlcat(char *dst, const char *src, size_t siz) { char *d = dst; const char *s = src; size_t dlen, n = siz; /* Find the end of dst and adjust bytes left but don't go past end */ while (n-- != 0 && *d != '\0') { d++; } dlen = d - dst; n = siz - dlen; if (n == 0) { return (dlen + strlen(s)); } while (*s != '\0') { if (n != 1) { *d++ = *s; n--; } s++; } *d = '\0'; return (dlen + (s - src)); /* count does not include NUL */ } /* UCS-4 version of Strlcat() */ size_t AG_StrlcatUCS4(Uint32 *dst, const Uint32 *src, size_t bytes) { Uint32 *d = dst; const Uint32 *s = src; size_t siz = bytes / sizeof(Uint32); size_t n = siz; size_t dlen; /* Find the end of dst and adjust bytes left but don't go past end. */ while (n-- != 0 && *d != '\0') { d++; } dlen = d - dst; n = siz - dlen; if (n == 0) { return ((dlen + AG_LengthUCS4(s))*sizeof(Uint32)); } while (*s != '\0') { if (n != 1) { *d++ = *s; n--; } s++; } *d = '\0'; return ((dlen + (s - src))*sizeof(Uint32)); /* Does not include NUL */ } /* * Get next token from string *stringp, where tokens are possibly-empty * strings separated by characters from delim. * * Writes NULs into the string at *stringp to end tokens. * delim need not remain constant from call to call. * On return, *stringp points past the last NUL written (if there might * be further tokens), or is NULL (if there are definitely no more tokens). * * If *stringp is NULL, AG_Strsep returns NULL. */ char * AG_Strsep(char **stringp, const char *delim) { char *s; const char *spanp; int c, sc; char *tok; if ((s = *stringp) == NULL) { return (NULL); } for (tok = s;;) { c = *s++; spanp = delim; do { if ((sc = *spanp++) == c) { if (c == 0) { s = NULL; } else { s[-1] = 0; } *stringp = s; return (tok); } } while (sc != 0); } } /* UCS-4 version of Strsep() */ Uint32 * AG_StrsepUCS4(Uint32 **stringp, const Uint32 *delim) { Uint32 *s; const Uint32 *spanp; Uint32 c, sc; Uint32 *tok; if ((s = *stringp) == NULL) { return (NULL); } for (tok = s;;) { c = *s++; spanp = delim; do { if ((sc = *spanp++) == c) { if (c == 0) { s = NULL; } else { s[-1] = 0; } *stringp = s; return (tok); } } while (sc != 0); } } /* Duplicate a string. */ char * AG_Strdup(const char *s) { size_t buflen; char *ns; buflen = strlen(s)+1; ns = Malloc(buflen); memcpy(ns, s, buflen); return (ns); } /* Duplicate a UCS-4 string. */ Uint32 * AG_StrdupUCS4(const Uint32 *ucs) { size_t buflen; Uint32 *ns; buflen = (AG_LengthUCS4(ucs) + 1)*sizeof(Uint32); ns = Malloc(buflen); memcpy(ns, ucs, buflen); return (ns); } /* * Returns a buffer containing a UCS-4 representation of the given * string/encoding. If len is 0, enough memory to hold the string is * allocated. Otherwise, a buffer of the specified size is allocated. */ Uint32 * AG_ImportUnicode(enum ag_unicode_conv conv, const char *s, size_t pLen) { Uint32 *ucs; size_t i, j; size_t sLen = strlen(s); size_t bufLen = (pLen != 0) ? pLen : (sLen+1); ucs = Malloc(bufLen*sizeof(Uint32)); switch (conv) { case AG_UNICODE_FROM_USASCII: for (i = 0; i < sLen; i++) { ucs[i] = ((const unsigned char *)s)[i]; } ucs[i] = '\0'; break; case AG_UNICODE_FROM_UTF8: for (i = 0, j = 0; i < sLen; i++, j++) { switch (AG_CharLengthUTF8(s[i])) { case 1: ucs[j] = (Uint32)s[i]; break; case 2: ucs[j] = (Uint32)(s[i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 3: ucs[j] = (Uint32)(s[i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 4: ucs[j] = (Uint32)(s[i] & 0x07) << 18; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 5: ucs[j] = (Uint32)(s[i] & 0x03) << 24; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 18; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 6: ucs[j] = (Uint32)(s[i] & 0x01) << 30; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 24; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 18; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case -1: ucs[j] = '?'; break; } } ucs[j] = '\0'; break; default: break; } return (ucs); } size_t AG_CopyUnicode(enum ag_unicode_conv conv, const char *s, Uint32 *ucs, size_t ucs_len) { size_t len; size_t i, j; len = strlen(s); switch (conv) { case AG_UNICODE_FROM_USASCII: if (len > ucs_len) { len = ucs_len; } for (i = 0; i < len; i++) { ucs[i] = ((const unsigned char *)s)[i]; } ucs[i] = '\0'; return (i); case AG_UNICODE_FROM_UTF8: for (i = 0, j = 0; i < len; i++, j++) { switch (AG_CharLengthUTF8(s[i])) { case 1: if (i+1 >= ucs_len) { break; } ucs[j] = (Uint32)s[i]; break; case 2: if (i+2 >= ucs_len) { break; } ucs[j] = (Uint32)(s[i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 3: if (i+3 >= ucs_len) { break; } ucs[j] = (Uint32)(s[i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 4: if (i+4 >= ucs_len) { break; } ucs[j] = (Uint32)(s[i] & 0x07) << 18; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 5: if (i+5 >= ucs_len) { break; } ucs[j] = (Uint32)(s[i] & 0x03) << 24; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 18; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case 6: if (i+6 >= ucs_len) { break; } ucs[j] = (Uint32)(s[i] & 0x01) << 30; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 24; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 18; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 12; ucs[j] |= (Uint32)(s[++i] & 0x3f) << 6; ucs[j] |= (Uint32)(s[++i] & 0x3f); break; case -1: if (i+1 >= ucs_len) { break; } ucs[j] = '?'; break; } } ucs[j] = '\0'; return (j); default: break; } return (0); } /* * Convert a UCS-4 string to the given encoding. * At most dst_size-1 bytes will be copied. The string is NUL-terminated * unless dst_size == 0. * * If retval >= dst_size, truncation occurred. If retval == -1, a * conversion error has occurred. */ long AG_ExportUnicode(enum ag_unicode_conv conv, char *dst, const Uint32 *ucs, size_t dst_size) { size_t len; switch (conv) { case AG_UNICODE_TO_UTF8: for (len = 0; *ucs != '\0' && len < dst_size; ucs++) { Uint32 uch = *ucs; int chlen, ch1, i; if (uch < 0x80) { chlen = 1; ch1 = 0; } else if (uch < 0x800) { chlen = 2; ch1 = 0xc0; } else if (uch < 0x10000) { chlen = 3; ch1 = 0xe0; } else if (uch < 0x200000) { chlen = 4; ch1 = 0xf0; } else if (uch < 0x4000000) { chlen = 5; ch1 = 0xf8; } else if (uch <= 0x7fffffff) { chlen = 6; ch1 = 0xfc; } else { return (-1); } if (len+chlen+1 >= dst_size) { return ((long)len+chlen); } for (i = chlen - 1; i > 0; i--) { dst[i] = (uch & 0x3f) | 0x80; uch >>= 6; } dst[0] = uch | ch1; dst += chlen; len += chlen; } *dst = '\0'; return (long)len; default: return (-1); } }