avstring.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /*
  2. * Copyright (c) 2007 Mans Rullgard
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #ifndef AVUTIL_AVSTRING_H
  21. #define AVUTIL_AVSTRING_H
  22. #include <stddef.h>
  23. #include <stdint.h>
  24. #include "attributes.h"
  25. /**
  26. * @addtogroup lavu_string
  27. * @{
  28. */
  29. /**
  30. * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
  31. * the address of the first character in str after the prefix.
  32. *
  33. * @param str input string
  34. * @param pfx prefix to test
  35. * @param ptr updated if the prefix is matched inside str
  36. * @return non-zero if the prefix matches, zero otherwise
  37. */
  38. int av_strstart(const char *str, const char *pfx, const char **ptr);
  39. /**
  40. * Return non-zero if pfx is a prefix of str independent of case. If
  41. * it is, *ptr is set to the address of the first character in str
  42. * after the prefix.
  43. *
  44. * @param str input string
  45. * @param pfx prefix to test
  46. * @param ptr updated if the prefix is matched inside str
  47. * @return non-zero if the prefix matches, zero otherwise
  48. */
  49. int av_stristart(const char *str, const char *pfx, const char **ptr);
  50. /**
  51. * Locate the first case-independent occurrence in the string haystack
  52. * of the string needle. A zero-length string needle is considered to
  53. * match at the start of haystack.
  54. *
  55. * This function is a case-insensitive version of the standard strstr().
  56. *
  57. * @param haystack string to search in
  58. * @param needle string to search for
  59. * @return pointer to the located match within haystack
  60. * or a null pointer if no match
  61. */
  62. char *av_stristr(const char *haystack, const char *needle);
  63. /**
  64. * Locate the first occurrence of the string needle in the string haystack
  65. * where not more than hay_length characters are searched. A zero-length
  66. * string needle is considered to match at the start of haystack.
  67. *
  68. * This function is a length-limited version of the standard strstr().
  69. *
  70. * @param haystack string to search in
  71. * @param needle string to search for
  72. * @param hay_length length of string to search in
  73. * @return pointer to the located match within haystack
  74. * or a null pointer if no match
  75. */
  76. char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
  77. /**
  78. * Copy the string src to dst, but no more than size - 1 bytes, and
  79. * null-terminate dst.
  80. *
  81. * This function is the same as BSD strlcpy().
  82. *
  83. * @param dst destination buffer
  84. * @param src source string
  85. * @param size size of destination buffer
  86. * @return the length of src
  87. *
  88. * @warning since the return value is the length of src, src absolutely
  89. * _must_ be a properly 0-terminated string, otherwise this will read beyond
  90. * the end of the buffer and possibly crash.
  91. */
  92. size_t av_strlcpy(char *dst, const char *src, size_t size);
  93. /**
  94. * Append the string src to the string dst, but to a total length of
  95. * no more than size - 1 bytes, and null-terminate dst.
  96. *
  97. * This function is similar to BSD strlcat(), but differs when
  98. * size <= strlen(dst).
  99. *
  100. * @param dst destination buffer
  101. * @param src source string
  102. * @param size size of destination buffer
  103. * @return the total length of src and dst
  104. *
  105. * @warning since the return value use the length of src and dst, these
  106. * absolutely _must_ be a properly 0-terminated strings, otherwise this
  107. * will read beyond the end of the buffer and possibly crash.
  108. */
  109. size_t av_strlcat(char *dst, const char *src, size_t size);
  110. /**
  111. * Append output to a string, according to a format. Never write out of
  112. * the destination buffer, and always put a terminating 0 within
  113. * the buffer.
  114. * @param dst destination buffer (string to which the output is
  115. * appended)
  116. * @param size total size of the destination buffer
  117. * @param fmt printf-compatible format string, specifying how the
  118. * following parameters are used
  119. * @return the length of the string that would have been generated
  120. * if enough space had been available
  121. */
  122. size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
  123. /**
  124. * Get the count of continuous non zero chars starting from the beginning.
  125. *
  126. * @param s the string whose length to count
  127. * @param len maximum number of characters to check in the string, that
  128. * is the maximum value which is returned by the function
  129. */
  130. static inline size_t av_strnlen(const char *s, size_t len)
  131. {
  132. size_t i;
  133. for (i = 0; i < len && s[i]; i++)
  134. ;
  135. return i;
  136. }
  137. /**
  138. * Print arguments following specified format into a large enough auto
  139. * allocated buffer. It is similar to GNU asprintf().
  140. * @param fmt printf-compatible format string, specifying how the
  141. * following parameters are used.
  142. * @return the allocated string
  143. * @note You have to free the string yourself with av_free().
  144. */
  145. char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);
  146. /**
  147. * Unescape the given string until a non escaped terminating char,
  148. * and return the token corresponding to the unescaped string.
  149. *
  150. * The normal \ and ' escaping is supported. Leading and trailing
  151. * whitespaces are removed, unless they are escaped with '\' or are
  152. * enclosed between ''.
  153. *
  154. * @param buf the buffer to parse, buf will be updated to point to the
  155. * terminating char
  156. * @param term a 0-terminated list of terminating chars
  157. * @return the malloced unescaped string, which must be av_freed by
  158. * the user, NULL in case of allocation failure
  159. */
  160. char *av_get_token(const char **buf, const char *term);
  161. /**
  162. * Split the string into several tokens which can be accessed by
  163. * successive calls to av_strtok().
  164. *
  165. * A token is defined as a sequence of characters not belonging to the
  166. * set specified in delim.
  167. *
  168. * On the first call to av_strtok(), s should point to the string to
  169. * parse, and the value of saveptr is ignored. In subsequent calls, s
  170. * should be NULL, and saveptr should be unchanged since the previous
  171. * call.
  172. *
  173. * This function is similar to strtok_r() defined in POSIX.1.
  174. *
  175. * @param s the string to parse, may be NULL
  176. * @param delim 0-terminated list of token delimiters, must be non-NULL
  177. * @param saveptr user-provided pointer which points to stored
  178. * information necessary for av_strtok() to continue scanning the same
  179. * string. saveptr is updated to point to the next character after the
  180. * first delimiter found, or to NULL if the string was terminated
  181. * @return the found token, or NULL when no token is found
  182. */
  183. char *av_strtok(char *s, const char *delim, char **saveptr);
  184. /**
  185. * Locale-independent conversion of ASCII isdigit.
  186. */
  187. static inline av_const int av_isdigit(int c)
  188. {
  189. return c >= '0' && c <= '9';
  190. }
  191. /**
  192. * Locale-independent conversion of ASCII isgraph.
  193. */
  194. static inline av_const int av_isgraph(int c)
  195. {
  196. return c > 32 && c < 127;
  197. }
  198. /**
  199. * Locale-independent conversion of ASCII isspace.
  200. */
  201. static inline av_const int av_isspace(int c)
  202. {
  203. return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||
  204. c == '\v';
  205. }
  206. /**
  207. * Locale-independent conversion of ASCII characters to uppercase.
  208. */
  209. static inline av_const int av_toupper(int c)
  210. {
  211. if (c >= 'a' && c <= 'z')
  212. c ^= 0x20;
  213. return c;
  214. }
  215. /**
  216. * Locale-independent conversion of ASCII characters to lowercase.
  217. */
  218. static inline av_const int av_tolower(int c)
  219. {
  220. if (c >= 'A' && c <= 'Z')
  221. c ^= 0x20;
  222. return c;
  223. }
  224. /**
  225. * Locale-independent conversion of ASCII isxdigit.
  226. */
  227. static inline av_const int av_isxdigit(int c)
  228. {
  229. c = av_tolower(c);
  230. return av_isdigit(c) || (c >= 'a' && c <= 'f');
  231. }
  232. /**
  233. * Locale-independent case-insensitive compare.
  234. * @note This means only ASCII-range characters are case-insensitive
  235. */
  236. int av_strcasecmp(const char *a, const char *b);
  237. /**
  238. * Locale-independent case-insensitive compare.
  239. * @note This means only ASCII-range characters are case-insensitive
  240. */
  241. int av_strncasecmp(const char *a, const char *b, size_t n);
  242. /**
  243. * Locale-independent strings replace.
  244. * @note This means only ASCII-range characters are replaced.
  245. */
  246. char *av_strireplace(const char *str, const char *from, const char *to);
  247. /**
  248. * Thread safe basename.
  249. * @param path the string to parse, on DOS both \ and / are considered separators.
  250. * @return pointer to the basename substring.
  251. * If path does not contain a slash, the function returns a copy of path.
  252. * If path is a NULL pointer or points to an empty string, a pointer
  253. * to a string "." is returned.
  254. */
  255. const char *av_basename(const char *path);
  256. /**
  257. * Thread safe dirname.
  258. * @param path the string to parse, on DOS both \ and / are considered separators.
  259. * @return A pointer to a string that's the parent directory of path.
  260. * If path is a NULL pointer or points to an empty string, a pointer
  261. * to a string "." is returned.
  262. * @note the function may modify the contents of the path, so copies should be passed.
  263. */
  264. const char *av_dirname(char *path);
  265. /**
  266. * Match instances of a name in a comma-separated list of names.
  267. * List entries are checked from the start to the end of the names list,
  268. * the first match ends further processing. If an entry prefixed with '-'
  269. * matches, then 0 is returned. The "ALL" list entry is considered to
  270. * match all names.
  271. *
  272. * @param name Name to look for.
  273. * @param names List of names.
  274. * @return 1 on match, 0 otherwise.
  275. */
  276. int av_match_name(const char *name, const char *names);
  277. /**
  278. * Append path component to the existing path.
  279. * Path separator '/' is placed between when needed.
  280. * Resulting string have to be freed with av_free().
  281. * @param path base path
  282. * @param component component to be appended
  283. * @return new path or NULL on error.
  284. */
  285. char *av_append_path_component(const char *path, const char *component);
  286. enum AVEscapeMode {
  287. AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.
  288. AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
  289. AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.
  290. AV_ESCAPE_MODE_XML, ///< Use XML non-markup character data escaping.
  291. };
  292. /**
  293. * Consider spaces special and escape them even in the middle of the
  294. * string.
  295. *
  296. * This is equivalent to adding the whitespace characters to the special
  297. * characters lists, except it is guaranteed to use the exact same list
  298. * of whitespace characters as the rest of libavutil.
  299. */
  300. #define AV_ESCAPE_FLAG_WHITESPACE (1 << 0)
  301. /**
  302. * Escape only specified special characters.
  303. * Without this flag, escape also any characters that may be considered
  304. * special by av_get_token(), such as the single quote.
  305. */
  306. #define AV_ESCAPE_FLAG_STRICT (1 << 1)
  307. /**
  308. * Within AV_ESCAPE_MODE_XML, additionally escape single quotes for single
  309. * quoted attributes.
  310. */
  311. #define AV_ESCAPE_FLAG_XML_SINGLE_QUOTES (1 << 2)
  312. /**
  313. * Within AV_ESCAPE_MODE_XML, additionally escape double quotes for double
  314. * quoted attributes.
  315. */
  316. #define AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES (1 << 3)
  317. /**
  318. * Escape string in src, and put the escaped string in an allocated
  319. * string in *dst, which must be freed with av_free().
  320. *
  321. * @param dst pointer where an allocated string is put
  322. * @param src string to escape, must be non-NULL
  323. * @param special_chars string containing the special characters which
  324. * need to be escaped, can be NULL
  325. * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros.
  326. * Any unknown value for mode will be considered equivalent to
  327. * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
  328. * notice.
  329. * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros
  330. * @return the length of the allocated string, or a negative error code in case of error
  331. * @see av_bprint_escape()
  332. */
  333. av_warn_unused_result
  334. int av_escape(char **dst, const char *src, const char *special_chars,
  335. enum AVEscapeMode mode, int flags);
  336. #define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
  337. #define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
  338. #define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
  339. #define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
  340. #define AV_UTF8_FLAG_ACCEPT_ALL \
  341. AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
  342. /**
  343. * Read and decode a single UTF-8 code point (character) from the
  344. * buffer in *buf, and update *buf to point to the next byte to
  345. * decode.
  346. *
  347. * In case of an invalid byte sequence, the pointer will be updated to
  348. * the next byte after the invalid sequence and the function will
  349. * return an error code.
  350. *
  351. * Depending on the specified flags, the function will also fail in
  352. * case the decoded code point does not belong to a valid range.
  353. *
  354. * @note For speed-relevant code a carefully implemented use of
  355. * GET_UTF8() may be preferred.
  356. *
  357. * @param codep pointer used to return the parsed code in case of success.
  358. * The value in *codep is set even in case the range check fails.
  359. * @param bufp pointer to the address the first byte of the sequence
  360. * to decode, updated by the function to point to the
  361. * byte next after the decoded sequence
  362. * @param buf_end pointer to the end of the buffer, points to the next
  363. * byte past the last in the buffer. This is used to
  364. * avoid buffer overreads (in case of an unfinished
  365. * UTF-8 sequence towards the end of the buffer).
  366. * @param flags a collection of AV_UTF8_FLAG_* flags
  367. * @return >= 0 in case a sequence was successfully read, a negative
  368. * value in case of invalid sequence
  369. */
  370. av_warn_unused_result
  371. int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
  372. unsigned int flags);
  373. /**
  374. * Check if a name is in a list.
  375. * @returns 0 if not found, or the 1 based index where it has been found in the
  376. * list.
  377. */
  378. int av_match_list(const char *name, const char *list, char separator);
  379. /**
  380. * See libc sscanf manual for more information.
  381. * Locale-independent sscanf implementation.
  382. */
  383. int av_sscanf(const char *string, const char *format, ...);
  384. /**
  385. * @}
  386. */
  387. #endif /* AVUTIL_AVSTRING_H */