rapidcsv.h 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821
  1. /*
  2. * rapidcsv.h
  3. *
  4. * URL: https://github.com/d99kris/rapidcsv
  5. * Version: 8.68
  6. *
  7. * Copyright (C) 2017-2022 Kristofer Berggren
  8. * All rights reserved.
  9. *
  10. * rapidcsv is distributed under the BSD 3-Clause license, see LICENSE for details.
  11. *
  12. */
  13. #pragma once
  14. #include <algorithm>
  15. #include <cassert>
  16. #include <cmath>
  17. #ifdef HAS_CODECVT
  18. #include <codecvt>
  19. #include <locale>
  20. #endif
  21. #include <fstream>
  22. #include <functional>
  23. #include <iostream>
  24. #include <limits>
  25. #include <map>
  26. #include <sstream>
  27. #include <string>
  28. #include <typeinfo>
  29. #include <vector>
  30. #if defined(_MSC_VER)
  31. #include <BaseTsd.h>
  32. typedef SSIZE_T ssize_t;
  33. #endif
  34. namespace rapidcsv
  35. {
  36. #if defined(_MSC_VER)
  37. static const bool sPlatformHasCR = true;
  38. #else
  39. static const bool sPlatformHasCR = false;
  40. #endif
  41. /**
  42. * @brief Datastructure holding parameters controlling how invalid numbers (including
  43. * empty strings) should be handled.
  44. */
  45. struct ConverterParams
  46. {
  47. /**
  48. * @brief Constructor
  49. * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be
  50. * converted to a default numerical value, instead of causing
  51. * an exception to be thrown (default).
  52. * @param pDefaultFloat floating-point default value to represent invalid numbers.
  53. * @param pDefaultInteger integer default value to represent invalid numbers.
  54. * @param pNumericLocale specifies whether to honor LC_NUMERIC locale (default
  55. * true).
  56. */
  57. explicit ConverterParams(const bool pHasDefaultConverter = false,
  58. const long double pDefaultFloat = std::numeric_limits<long double>::signaling_NaN(),
  59. const long long pDefaultInteger = 0,
  60. const bool pNumericLocale = true)
  61. : mHasDefaultConverter(pHasDefaultConverter)
  62. , mDefaultFloat(pDefaultFloat)
  63. , mDefaultInteger(pDefaultInteger)
  64. , mNumericLocale(pNumericLocale)
  65. {
  66. }
  67. /**
  68. * @brief specifies if conversion of non-numerical strings shall be converted to a default
  69. * numerical value, instead of causing an exception to be thrown (default).
  70. */
  71. bool mHasDefaultConverter;
  72. /**
  73. * @brief floating-point default value to represent invalid numbers.
  74. */
  75. long double mDefaultFloat;
  76. /**
  77. * @brief integer default value to represent invalid numbers.
  78. */
  79. long long mDefaultInteger;
  80. /**
  81. * @brief specifies whether to honor LC_NUMERIC locale.
  82. */
  83. bool mNumericLocale;
  84. };
  85. /**
  86. * @brief Exception thrown when attempting to access Document data in a datatype which
  87. * is not supported by the Converter class.
  88. */
  89. class no_converter : public std::exception
  90. {
  91. /**
  92. * @brief Provides details about the exception
  93. * @returns an explanatory string
  94. */
  95. virtual const char* what() const throw()
  96. {
  97. return "unsupported conversion datatype";
  98. }
  99. };
  100. /**
  101. * @brief Class providing conversion to/from numerical datatypes and strings. Only
  102. * intended for rapidcsv internal usage, but exposed externally to allow
  103. * specialization for custom datatype conversions.
  104. */
  105. template<typename T>
  106. class Converter
  107. {
  108. public:
  109. /**
  110. * @brief Constructor
  111. * @param pConverterParams specifies how conversion of non-numerical values to
  112. * numerical datatype shall be handled.
  113. */
  114. Converter(const ConverterParams& pConverterParams)
  115. : mConverterParams(pConverterParams)
  116. {
  117. }
  118. /**
  119. * @brief Converts numerical value to string representation.
  120. * @param pVal numerical value
  121. * @param pStr output string
  122. */
  123. void ToStr(const T& pVal, std::string& pStr) const
  124. {
  125. if (typeid(T) == typeid(int) ||
  126. typeid(T) == typeid(long) ||
  127. typeid(T) == typeid(long long) ||
  128. typeid(T) == typeid(unsigned) ||
  129. typeid(T) == typeid(unsigned long) ||
  130. typeid(T) == typeid(unsigned long long) ||
  131. typeid(T) == typeid(float) ||
  132. typeid(T) == typeid(double) ||
  133. typeid(T) == typeid(long double) ||
  134. typeid(T) == typeid(char))
  135. {
  136. std::ostringstream out;
  137. out << pVal;
  138. pStr = out.str();
  139. }
  140. else
  141. {
  142. throw no_converter();
  143. }
  144. }
  145. /**
  146. * @brief Converts string holding a numerical value to numerical datatype representation.
  147. * @param pVal numerical value
  148. * @param pStr output string
  149. */
  150. void ToVal(const std::string& pStr, T& pVal) const
  151. {
  152. try
  153. {
  154. if (typeid(T) == typeid(int))
  155. {
  156. pVal = static_cast<T>(std::stoi(pStr));
  157. return;
  158. }
  159. else if (typeid(T) == typeid(long))
  160. {
  161. pVal = static_cast<T>(std::stol(pStr));
  162. return;
  163. }
  164. else if (typeid(T) == typeid(long long))
  165. {
  166. pVal = static_cast<T>(std::stoll(pStr));
  167. return;
  168. }
  169. else if (typeid(T) == typeid(unsigned))
  170. {
  171. pVal = static_cast<T>(std::stoul(pStr));
  172. return;
  173. }
  174. else if (typeid(T) == typeid(unsigned long))
  175. {
  176. pVal = static_cast<T>(std::stoul(pStr));
  177. return;
  178. }
  179. else if (typeid(T) == typeid(unsigned long long))
  180. {
  181. pVal = static_cast<T>(std::stoull(pStr));
  182. return;
  183. }
  184. }
  185. catch (...)
  186. {
  187. if (!mConverterParams.mHasDefaultConverter)
  188. {
  189. throw;
  190. }
  191. else
  192. {
  193. pVal = static_cast<T>(mConverterParams.mDefaultInteger);
  194. return;
  195. }
  196. }
  197. try
  198. {
  199. if (mConverterParams.mNumericLocale)
  200. {
  201. if (typeid(T) == typeid(float))
  202. {
  203. pVal = static_cast<T>(std::stof(pStr));
  204. return;
  205. }
  206. else if (typeid(T) == typeid(double))
  207. {
  208. pVal = static_cast<T>(std::stod(pStr));
  209. return;
  210. }
  211. else if (typeid(T) == typeid(long double))
  212. {
  213. pVal = static_cast<T>(std::stold(pStr));
  214. return;
  215. }
  216. }
  217. else
  218. {
  219. if ((typeid(T) == typeid(float)) ||
  220. (typeid(T) == typeid(double)) ||
  221. (typeid(T) == typeid(long double)))
  222. {
  223. std::istringstream iss(pStr);
  224. iss >> pVal;
  225. if (iss.fail() || iss.bad() || !iss.eof())
  226. {
  227. throw std::invalid_argument("istringstream: no conversion");
  228. }
  229. return;
  230. }
  231. }
  232. }
  233. catch (...)
  234. {
  235. if (!mConverterParams.mHasDefaultConverter)
  236. {
  237. throw;
  238. }
  239. else
  240. {
  241. pVal = static_cast<T>(mConverterParams.mDefaultFloat);
  242. return;
  243. }
  244. }
  245. if (typeid(T) == typeid(char))
  246. {
  247. pVal = static_cast<T>(pStr[0]);
  248. return;
  249. }
  250. else
  251. {
  252. throw no_converter();
  253. }
  254. }
  255. private:
  256. const ConverterParams& mConverterParams;
  257. };
  258. /**
  259. * @brief Specialized implementation handling string to string conversion.
  260. * @param pVal string
  261. * @param pStr string
  262. */
  263. template<>
  264. inline void Converter<std::string>::ToStr(const std::string& pVal, std::string& pStr) const
  265. {
  266. pStr = pVal;
  267. }
  268. /**
  269. * @brief Specialized implementation handling string to string conversion.
  270. * @param pVal string
  271. * @param pStr string
  272. */
  273. template<>
  274. inline void Converter<std::string>::ToVal(const std::string& pStr, std::string& pVal) const
  275. {
  276. pVal = pStr;
  277. }
  278. template<typename T>
  279. using ConvFunc = std::function<void (const std::string & pStr, T & pVal)>;
  280. /**
  281. * @brief Datastructure holding parameters controlling which row and column should be
  282. * treated as labels.
  283. */
  284. struct LabelParams
  285. {
  286. /**
  287. * @brief Constructor
  288. * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting
  289. * it to -1 prevents column lookup by label name, and gives access
  290. * to all rows as document data. Default: 0
  291. * @param pRowNameIdx specifies the zero-based column index of the row labels, setting
  292. * it to -1 prevents row lookup by label name, and gives access
  293. * to all columns as document data. Default: -1
  294. */
  295. explicit LabelParams(const ssize_t pColumnNameIdx = 0, const ssize_t pRowNameIdx = -1)
  296. : mColumnNameIdx(pColumnNameIdx)
  297. , mRowNameIdx(pRowNameIdx)
  298. {
  299. if (mColumnNameIdx < -1)
  300. {
  301. const std::string errStr = "invalid column name index " +
  302. std::to_string(mColumnNameIdx) + " < -1";
  303. throw std::out_of_range(errStr);
  304. }
  305. if (mRowNameIdx < -1)
  306. {
  307. const std::string errStr = "invalid row name index " +
  308. std::to_string(mRowNameIdx) + " < -1";
  309. throw std::out_of_range(errStr);
  310. }
  311. }
  312. /**
  313. * @brief specifies the zero-based row index of the column labels.
  314. */
  315. ssize_t mColumnNameIdx;
  316. /**
  317. * @brief specifies the zero-based column index of the row labels.
  318. */
  319. ssize_t mRowNameIdx;
  320. };
  321. /**
  322. * @brief Datastructure holding parameters controlling how the CSV data fields are separated.
  323. */
  324. struct SeparatorParams
  325. {
  326. /**
  327. * @brief Constructor
  328. * @param pSeparator specifies the column separator (default ',').
  329. * @param pTrim specifies whether to trim leading and trailing spaces from
  330. * cells read (default false).
  331. * @param pHasCR specifies whether a new document (i.e. not an existing document read)
  332. * should use CR/LF instead of only LF (default is to use standard
  333. * behavior of underlying platforms - CR/LF for Win, and LF for others).
  334. * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false)
  335. * @param pAutoQuote specifies whether to automatically dequote data during read, and add
  336. * quotes during write (default true).
  337. * @param pQuoteChar specifies the quote character (default '\"').
  338. */
  339. explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false,
  340. const bool pHasCR = sPlatformHasCR, const bool pQuotedLinebreaks = false,
  341. const bool pAutoQuote = true, const char pQuoteChar = '"')
  342. : mSeparator(pSeparator)
  343. , mTrim(pTrim)
  344. , mHasCR(pHasCR)
  345. , mQuotedLinebreaks(pQuotedLinebreaks)
  346. , mAutoQuote(pAutoQuote)
  347. , mQuoteChar(pQuoteChar)
  348. {
  349. }
  350. /**
  351. * @brief specifies the column separator.
  352. */
  353. char mSeparator;
  354. /**
  355. * @brief specifies whether to trim leading and trailing spaces from cells read.
  356. */
  357. bool mTrim;
  358. /**
  359. * @brief specifies whether new documents should use CR/LF instead of LF.
  360. */
  361. bool mHasCR;
  362. /**
  363. * @brief specifies whether to allow line breaks in quoted text.
  364. */
  365. bool mQuotedLinebreaks;
  366. /**
  367. * @brief specifies whether to automatically dequote cell data.
  368. */
  369. bool mAutoQuote;
  370. /**
  371. * @brief specifies the quote character.
  372. */
  373. char mQuoteChar;
  374. };
  375. /**
  376. * @brief Datastructure holding parameters controlling how special line formats should be
  377. * treated.
  378. */
  379. struct LineReaderParams
  380. {
  381. /**
  382. * @brief Constructor
  383. * @param pSkipCommentLines specifies whether to skip lines prefixed with
  384. * mCommentPrefix. Default: false
  385. * @param pCommentPrefix specifies which prefix character to indicate a comment
  386. * line. Default: #
  387. * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false
  388. */
  389. explicit LineReaderParams(const bool pSkipCommentLines = false,
  390. const char pCommentPrefix = '#',
  391. const bool pSkipEmptyLines = false)
  392. : mSkipCommentLines(pSkipCommentLines)
  393. , mCommentPrefix(pCommentPrefix)
  394. , mSkipEmptyLines(pSkipEmptyLines)
  395. {
  396. }
  397. /**
  398. * @brief specifies whether to skip lines prefixed with mCommentPrefix.
  399. */
  400. bool mSkipCommentLines;
  401. /**
  402. * @brief specifies which prefix character to indicate a comment line.
  403. */
  404. char mCommentPrefix;
  405. /**
  406. * @brief specifies whether to skip empty lines.
  407. */
  408. bool mSkipEmptyLines;
  409. };
  410. /**
  411. * @brief Class representing a CSV document.
  412. */
  413. class Document
  414. {
  415. public:
  416. /**
  417. * @brief Constructor
  418. * @param pPath specifies the path of an existing CSV-file to populate the Document
  419. * data with.
  420. * @param pLabelParams specifies which row and column should be treated as labels.
  421. * @param pSeparatorParams specifies which field and row separators should be used.
  422. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  423. * handled.
  424. * @param pLineReaderParams specifies how special line formats should be treated.
  425. */
  426. explicit Document(const std::string& pPath = std::string(),
  427. const LabelParams& pLabelParams = LabelParams(),
  428. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  429. const ConverterParams& pConverterParams = ConverterParams(),
  430. const LineReaderParams& pLineReaderParams = LineReaderParams())
  431. : mPath(pPath)
  432. , mLabelParams(pLabelParams)
  433. , mSeparatorParams(pSeparatorParams)
  434. , mConverterParams(pConverterParams)
  435. , mLineReaderParams(pLineReaderParams)
  436. , mData()
  437. , mColumnNames()
  438. , mRowNames()
  439. {
  440. if (!mPath.empty())
  441. {
  442. ReadCsv();
  443. }
  444. }
  445. /**
  446. * @brief Constructor
  447. * @param pStream specifies a binary input stream to read CSV data from.
  448. * @param pLabelParams specifies which row and column should be treated as labels.
  449. * @param pSeparatorParams specifies which field and row separators should be used.
  450. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  451. * handled.
  452. * @param pLineReaderParams specifies how special line formats should be treated.
  453. */
  454. explicit Document(std::istream& pStream,
  455. const LabelParams& pLabelParams = LabelParams(),
  456. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  457. const ConverterParams& pConverterParams = ConverterParams(),
  458. const LineReaderParams& pLineReaderParams = LineReaderParams())
  459. : mPath()
  460. , mLabelParams(pLabelParams)
  461. , mSeparatorParams(pSeparatorParams)
  462. , mConverterParams(pConverterParams)
  463. , mLineReaderParams(pLineReaderParams)
  464. , mData()
  465. , mColumnNames()
  466. , mRowNames()
  467. {
  468. ReadCsv(pStream);
  469. }
  470. /**
  471. * @brief Read Document data from file.
  472. * @param pPath specifies the path of an existing CSV-file to populate the Document
  473. * data with.
  474. * @param pLabelParams specifies which row and column should be treated as labels.
  475. * @param pSeparatorParams specifies which field and row separators should be used.
  476. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  477. * handled.
  478. * @param pLineReaderParams specifies how special line formats should be treated.
  479. */
  480. void Load(const std::string& pPath,
  481. const LabelParams& pLabelParams = LabelParams(),
  482. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  483. const ConverterParams& pConverterParams = ConverterParams(),
  484. const LineReaderParams& pLineReaderParams = LineReaderParams())
  485. {
  486. mPath = pPath;
  487. mLabelParams = pLabelParams;
  488. mSeparatorParams = pSeparatorParams;
  489. mConverterParams = pConverterParams;
  490. mLineReaderParams = pLineReaderParams;
  491. ReadCsv();
  492. }
  493. /**
  494. * @brief Read Document data from stream.
  495. * @param pStream specifies a binary input stream to read CSV data from.
  496. * @param pLabelParams specifies which row and column should be treated as labels.
  497. * @param pSeparatorParams specifies which field and row separators should be used.
  498. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  499. * handled.
  500. * @param pLineReaderParams specifies how special line formats should be treated.
  501. */
  502. void Load(std::istream& pStream,
  503. const LabelParams& pLabelParams = LabelParams(),
  504. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  505. const ConverterParams& pConverterParams = ConverterParams(),
  506. const LineReaderParams& pLineReaderParams = LineReaderParams())
  507. {
  508. mPath = "";
  509. mLabelParams = pLabelParams;
  510. mSeparatorParams = pSeparatorParams;
  511. mConverterParams = pConverterParams;
  512. mLineReaderParams = pLineReaderParams;
  513. ReadCsv(pStream);
  514. }
  515. /**
  516. * @brief Write Document data to file.
  517. * @param pPath optionally specifies the path where the CSV-file will be created
  518. * (if not specified, the original path provided when creating or
  519. * loading the Document data will be used).
  520. */
  521. void Save(const std::string& pPath = std::string())
  522. {
  523. if (!pPath.empty())
  524. {
  525. mPath = pPath;
  526. }
  527. WriteCsv();
  528. }
  529. /**
  530. * @brief Write Document data to stream.
  531. * @param pStream specifies a binary output stream to write the data to.
  532. */
  533. void Save(std::ostream& pStream)
  534. {
  535. WriteCsv(pStream);
  536. }
  537. /**
  538. * @brief Clears loaded Document data.
  539. *
  540. */
  541. void Clear()
  542. {
  543. mData.clear();
  544. mColumnNames.clear();
  545. mRowNames.clear();
  546. #ifdef HAS_CODECVT
  547. mIsUtf16 = false;
  548. mIsLE = false;
  549. #endif
  550. }
  551. /**
  552. * @brief Get column index by name.
  553. * @param pColumnName column label name.
  554. * @returns zero-based column index.
  555. */
  556. ssize_t GetColumnIdx(const std::string& pColumnName) const
  557. {
  558. if (mLabelParams.mColumnNameIdx >= 0)
  559. {
  560. if (mColumnNames.find(pColumnName) != mColumnNames.end())
  561. {
  562. return static_cast<ssize_t>(mColumnNames.at(pColumnName)) - (mLabelParams.mRowNameIdx + 1);
  563. }
  564. }
  565. return -1;
  566. }
  567. /**
  568. * @brief Get column by index.
  569. * @param pColumnIdx zero-based column index.
  570. * @returns vector of column data.
  571. */
  572. template<typename T>
  573. std::vector<T> GetColumn(const size_t pColumnIdx) const
  574. {
  575. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  576. std::vector<T> column;
  577. Converter<T> converter(mConverterParams);
  578. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  579. {
  580. if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
  581. {
  582. if (dataColumnIdx < itRow->size())
  583. {
  584. T val;
  585. converter.ToVal(itRow->at(dataColumnIdx), val);
  586. column.push_back(val);
  587. }
  588. else
  589. {
  590. const std::string errStr = "requested column index " +
  591. std::to_string(pColumnIdx) + " >= " +
  592. std::to_string(itRow->size() - GetDataColumnIndex(0)) +
  593. " (number of columns on row index " +
  594. std::to_string(std::distance(mData.begin(), itRow) -
  595. (mLabelParams.mColumnNameIdx + 1)) + ")";
  596. throw std::out_of_range(errStr);
  597. }
  598. }
  599. }
  600. return column;
  601. }
  602. /**
  603. * @brief Get column by index.
  604. * @param pColumnIdx zero-based column index.
  605. * @param pToVal conversion function.
  606. * @returns vector of column data.
  607. */
  608. template<typename T>
  609. std::vector<T> GetColumn(const size_t pColumnIdx, ConvFunc<T> pToVal) const
  610. {
  611. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  612. std::vector<T> column;
  613. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  614. {
  615. if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
  616. {
  617. T val;
  618. pToVal(itRow->at(dataColumnIdx), val);
  619. column.push_back(val);
  620. }
  621. }
  622. return column;
  623. }
  624. /**
  625. * @brief Get column by name.
  626. * @param pColumnName column label name.
  627. * @returns vector of column data.
  628. */
  629. template<typename T>
  630. std::vector<T> GetColumn(const std::string& pColumnName) const
  631. {
  632. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  633. if (columnIdx < 0)
  634. {
  635. throw std::out_of_range("column not found: " + pColumnName);
  636. }
  637. return GetColumn<T>(static_cast<size_t>(columnIdx));
  638. }
  639. /**
  640. * @brief Get column by name.
  641. * @param pColumnName column label name.
  642. * @param pToVal conversion function.
  643. * @returns vector of column data.
  644. */
  645. template<typename T>
  646. std::vector<T> GetColumn(const std::string& pColumnName, ConvFunc<T> pToVal) const
  647. {
  648. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  649. if (columnIdx < 0)
  650. {
  651. throw std::out_of_range("column not found: " + pColumnName);
  652. }
  653. return GetColumn<T>(static_cast<size_t>(columnIdx), pToVal);
  654. }
  655. /**
  656. * @brief Set column by index.
  657. * @param pColumnIdx zero-based column index.
  658. * @param pColumn vector of column data.
  659. */
  660. template<typename T>
  661. void SetColumn(const size_t pColumnIdx, const std::vector<T>& pColumn)
  662. {
  663. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  664. while (GetDataRowIndex(pColumn.size()) > GetDataRowCount())
  665. {
  666. std::vector<std::string> row;
  667. row.resize(GetDataColumnCount());
  668. mData.push_back(row);
  669. }
  670. if ((dataColumnIdx + 1) > GetDataColumnCount())
  671. {
  672. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  673. {
  674. itRow->resize(GetDataColumnIndex(dataColumnIdx + 1));
  675. }
  676. }
  677. Converter<T> converter(mConverterParams);
  678. for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
  679. {
  680. std::string str;
  681. converter.ToStr(*itRow, str);
  682. mData.at(static_cast<size_t>(std::distance(pColumn.begin(), itRow) + mLabelParams.mColumnNameIdx + 1)).at(
  683. dataColumnIdx) = str;
  684. }
  685. }
  686. /**
  687. * @brief Set column by name.
  688. * @param pColumnName column label name.
  689. * @param pColumn vector of column data.
  690. */
  691. template<typename T>
  692. void SetColumn(const std::string& pColumnName, const std::vector<T>& pColumn)
  693. {
  694. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  695. if (columnIdx < 0)
  696. {
  697. throw std::out_of_range("column not found: " + pColumnName);
  698. }
  699. SetColumn<T>(static_cast<size_t>(columnIdx), pColumn);
  700. }
  701. /**
  702. * @brief Remove column by index.
  703. * @param pColumnIdx zero-based column index.
  704. */
  705. void RemoveColumn(const size_t pColumnIdx)
  706. {
  707. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  708. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  709. {
  710. itRow->erase(itRow->begin() + static_cast<ssize_t>(dataColumnIdx));
  711. }
  712. UpdateColumnNames();
  713. }
  714. /**
  715. * @brief Remove column by name.
  716. * @param pColumnName column label name.
  717. */
  718. void RemoveColumn(const std::string& pColumnName)
  719. {
  720. ssize_t columnIdx = GetColumnIdx(pColumnName);
  721. if (columnIdx < 0)
  722. {
  723. throw std::out_of_range("column not found: " + pColumnName);
  724. }
  725. RemoveColumn(static_cast<size_t>(columnIdx));
  726. }
  727. /**
  728. * @brief Insert column at specified index.
  729. * @param pColumnIdx zero-based column index.
  730. * @param pColumn vector of column data (optional argument).
  731. * @param pColumnName column label name (optional argument).
  732. */
  733. template<typename T>
  734. void InsertColumn(const size_t pColumnIdx, const std::vector<T>& pColumn = std::vector<T>(),
  735. const std::string& pColumnName = std::string())
  736. {
  737. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  738. std::vector<std::string> column;
  739. if (pColumn.empty())
  740. {
  741. column.resize(GetDataRowCount());
  742. }
  743. else
  744. {
  745. column.resize(GetDataRowIndex(pColumn.size()));
  746. Converter<T> converter(mConverterParams);
  747. for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
  748. {
  749. std::string str;
  750. converter.ToStr(*itRow, str);
  751. const size_t rowIdx =
  752. static_cast<size_t>(std::distance(pColumn.begin(), itRow) + (mLabelParams.mColumnNameIdx + 1));
  753. column.at(rowIdx) = str;
  754. }
  755. }
  756. while (column.size() > GetDataRowCount())
  757. {
  758. std::vector<std::string> row;
  759. const size_t columnCount = std::max<size_t>(static_cast<size_t>(mLabelParams.mColumnNameIdx + 1),
  760. GetDataColumnCount());
  761. row.resize(columnCount);
  762. mData.push_back(row);
  763. }
  764. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  765. {
  766. const size_t rowIdx = static_cast<size_t>(std::distance(mData.begin(), itRow));
  767. itRow->insert(itRow->begin() + static_cast<ssize_t>(dataColumnIdx), column.at(rowIdx));
  768. }
  769. if (!pColumnName.empty())
  770. {
  771. SetColumnName(pColumnIdx, pColumnName);
  772. }
  773. UpdateColumnNames();
  774. }
  775. /**
  776. * @brief Get number of data columns (excluding label columns).
  777. * @returns column count.
  778. */
  779. size_t GetColumnCount() const
  780. {
  781. const ssize_t count = static_cast<ssize_t>((mData.size() > 0) ? mData.at(0).size() : 0) -
  782. (mLabelParams.mRowNameIdx + 1);
  783. return (count >= 0) ? static_cast<size_t>(count) : 0;
  784. }
  785. /**
  786. * @brief Get row index by name.
  787. * @param pRowName row label name.
  788. * @returns zero-based row index.
  789. */
  790. ssize_t GetRowIdx(const std::string& pRowName) const
  791. {
  792. if (mLabelParams.mRowNameIdx >= 0)
  793. {
  794. if (mRowNames.find(pRowName) != mRowNames.end())
  795. {
  796. return static_cast<ssize_t>(mRowNames.at(pRowName)) - (mLabelParams.mColumnNameIdx + 1);
  797. }
  798. }
  799. return -1;
  800. }
  801. /**
  802. * @brief Get row by index.
  803. * @param pRowIdx zero-based row index.
  804. * @returns vector of row data.
  805. */
  806. template<typename T>
  807. std::vector<T> GetRow(const size_t pRowIdx) const
  808. {
  809. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  810. std::vector<T> row;
  811. Converter<T> converter(mConverterParams);
  812. for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
  813. {
  814. if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
  815. {
  816. T val;
  817. converter.ToVal(*itCol, val);
  818. row.push_back(val);
  819. }
  820. }
  821. return row;
  822. }
  823. /**
  824. * @brief Get row by index.
  825. * @param pRowIdx zero-based row index.
  826. * @param pToVal conversion function.
  827. * @returns vector of row data.
  828. */
  829. template<typename T>
  830. std::vector<T> GetRow(const size_t pRowIdx, ConvFunc<T> pToVal) const
  831. {
  832. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  833. std::vector<T> row;
  834. Converter<T> converter(mConverterParams);
  835. for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
  836. {
  837. if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
  838. {
  839. T val;
  840. pToVal(*itCol, val);
  841. row.push_back(val);
  842. }
  843. }
  844. return row;
  845. }
  846. /**
  847. * @brief Get row by name.
  848. * @param pRowName row label name.
  849. * @returns vector of row data.
  850. */
  851. template<typename T>
  852. std::vector<T> GetRow(const std::string& pRowName) const
  853. {
  854. ssize_t rowIdx = GetRowIdx(pRowName);
  855. if (rowIdx < 0)
  856. {
  857. throw std::out_of_range("row not found: " + pRowName);
  858. }
  859. return GetRow<T>(static_cast<size_t>(rowIdx));
  860. }
  861. /**
  862. * @brief Get row by name.
  863. * @param pRowName row label name.
  864. * @param pToVal conversion function.
  865. * @returns vector of row data.
  866. */
  867. template<typename T>
  868. std::vector<T> GetRow(const std::string& pRowName, ConvFunc<T> pToVal) const
  869. {
  870. ssize_t rowIdx = GetRowIdx(pRowName);
  871. if (rowIdx < 0)
  872. {
  873. throw std::out_of_range("row not found: " + pRowName);
  874. }
  875. return GetRow<T>(static_cast<size_t>(rowIdx), pToVal);
  876. }
  877. /**
  878. * @brief Set row by index.
  879. * @param pRowIdx zero-based row index.
  880. * @param pRow vector of row data.
  881. */
  882. template<typename T>
  883. void SetRow(const size_t pRowIdx, const std::vector<T>& pRow)
  884. {
  885. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  886. while ((dataRowIdx + 1) > GetDataRowCount())
  887. {
  888. std::vector<std::string> row;
  889. row.resize(GetDataColumnCount());
  890. mData.push_back(row);
  891. }
  892. if (pRow.size() > GetDataColumnCount())
  893. {
  894. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  895. {
  896. itRow->resize(GetDataColumnIndex(pRow.size()));
  897. }
  898. }
  899. Converter<T> converter(mConverterParams);
  900. for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
  901. {
  902. std::string str;
  903. converter.ToStr(*itCol, str);
  904. mData.at(dataRowIdx).at(static_cast<size_t>(std::distance(pRow.begin(),
  905. itCol) + mLabelParams.mRowNameIdx + 1)) = str;
  906. }
  907. }
  908. /**
  909. * @brief Set row by name.
  910. * @param pRowName row label name.
  911. * @param pRow vector of row data.
  912. */
  913. template<typename T>
  914. void SetRow(const std::string& pRowName, const std::vector<T>& pRow)
  915. {
  916. ssize_t rowIdx = GetRowIdx(pRowName);
  917. if (rowIdx < 0)
  918. {
  919. throw std::out_of_range("row not found: " + pRowName);
  920. }
  921. return SetRow<T>(static_cast<size_t>(rowIdx), pRow);
  922. }
  923. /**
  924. * @brief Remove row by index.
  925. * @param pRowIdx zero-based row index.
  926. */
  927. void RemoveRow(const size_t pRowIdx)
  928. {
  929. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  930. mData.erase(mData.begin() + static_cast<ssize_t>(dataRowIdx));
  931. UpdateRowNames();
  932. }
  933. /**
  934. * @brief Remove row by name.
  935. * @param pRowName row label name.
  936. */
  937. void RemoveRow(const std::string& pRowName)
  938. {
  939. ssize_t rowIdx = GetRowIdx(pRowName);
  940. if (rowIdx < 0)
  941. {
  942. throw std::out_of_range("row not found: " + pRowName);
  943. }
  944. RemoveRow(static_cast<size_t>(rowIdx));
  945. }
  946. /**
  947. * @brief Insert row at specified index.
  948. * @param pRowIdx zero-based row index.
  949. * @param pRow vector of row data (optional argument).
  950. * @param pRowName row label name (optional argument).
  951. */
  952. template<typename T>
  953. void InsertRow(const size_t pRowIdx, const std::vector<T>& pRow = std::vector<T>(),
  954. const std::string& pRowName = std::string())
  955. {
  956. const size_t rowIdx = GetDataRowIndex(pRowIdx);
  957. std::vector<std::string> row;
  958. if (pRow.empty())
  959. {
  960. row.resize(GetDataColumnCount());
  961. }
  962. else
  963. {
  964. row.resize(GetDataColumnIndex(pRow.size()));
  965. Converter<T> converter(mConverterParams);
  966. for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
  967. {
  968. std::string str;
  969. converter.ToStr(*itCol, str);
  970. row.at(static_cast<size_t>(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str;
  971. }
  972. }
  973. while (rowIdx > GetDataRowCount())
  974. {
  975. std::vector<std::string> tempRow;
  976. tempRow.resize(GetDataColumnCount());
  977. mData.push_back(tempRow);
  978. }
  979. mData.insert(mData.begin() + static_cast<ssize_t>(rowIdx), row);
  980. if (!pRowName.empty())
  981. {
  982. SetRowName(pRowIdx, pRowName);
  983. }
  984. UpdateRowNames();
  985. }
  986. /**
  987. * @brief Get number of data rows (excluding label rows).
  988. * @returns row count.
  989. */
  990. size_t GetRowCount() const
  991. {
  992. const ssize_t count = static_cast<ssize_t>(mData.size()) - (mLabelParams.mColumnNameIdx + 1);
  993. return (count >= 0) ? static_cast<size_t>(count) : 0;
  994. }
  995. /**
  996. * @brief Get cell by index.
  997. * @param pColumnIdx zero-based column index.
  998. * @param pRowIdx zero-based row index.
  999. * @returns cell data.
  1000. */
  1001. template<typename T>
  1002. T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const
  1003. {
  1004. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1005. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1006. T val;
  1007. Converter<T> converter(mConverterParams);
  1008. converter.ToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
  1009. return val;
  1010. }
  1011. /**
  1012. * @brief Get cell by index.
  1013. * @param pColumnIdx zero-based column index.
  1014. * @param pRowIdx zero-based row index.
  1015. * @param pToVal conversion function.
  1016. * @returns cell data.
  1017. */
  1018. template<typename T>
  1019. T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc<T> pToVal) const
  1020. {
  1021. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1022. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1023. T val;
  1024. pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
  1025. return val;
  1026. }
  1027. /**
  1028. * @brief Get cell by name.
  1029. * @param pColumnName column label name.
  1030. * @param pRowName row label name.
  1031. * @returns cell data.
  1032. */
  1033. template<typename T>
  1034. T GetCell(const std::string& pColumnName, const std::string& pRowName) const
  1035. {
  1036. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  1037. if (columnIdx < 0)
  1038. {
  1039. throw std::out_of_range("column not found: " + pColumnName);
  1040. }
  1041. const ssize_t rowIdx = GetRowIdx(pRowName);
  1042. if (rowIdx < 0)
  1043. {
  1044. throw std::out_of_range("row not found: " + pRowName);
  1045. }
  1046. return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx));
  1047. }
  1048. /**
  1049. * @brief Get cell by name.
  1050. * @param pColumnName column label name.
  1051. * @param pRowName row label name.
  1052. * @param pToVal conversion function.
  1053. * @returns cell data.
  1054. */
  1055. template<typename T>
  1056. T GetCell(const std::string& pColumnName, const std::string& pRowName, ConvFunc<T> pToVal) const
  1057. {
  1058. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  1059. if (columnIdx < 0)
  1060. {
  1061. throw std::out_of_range("column not found: " + pColumnName);
  1062. }
  1063. const ssize_t rowIdx = GetRowIdx(pRowName);
  1064. if (rowIdx < 0)
  1065. {
  1066. throw std::out_of_range("row not found: " + pRowName);
  1067. }
  1068. return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pToVal);
  1069. }
  1070. /**
  1071. * @brief Get cell by column name and row index.
  1072. * @param pColumnName column label name.
  1073. * @param pRowIdx zero-based row index.
  1074. * @returns cell data.
  1075. */
  1076. template<typename T>
  1077. T GetCell(const std::string& pColumnName, const size_t pRowIdx) const
  1078. {
  1079. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  1080. if (columnIdx < 0)
  1081. {
  1082. throw std::out_of_range("column not found: " + pColumnName);
  1083. }
  1084. return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx);
  1085. }
  1086. /**
  1087. * @brief Get cell by column name and row index.
  1088. * @param pColumnName column label name.
  1089. * @param pRowIdx zero-based row index.
  1090. * @param pToVal conversion function.
  1091. * @returns cell data.
  1092. */
  1093. template<typename T>
  1094. T GetCell(const std::string& pColumnName, const size_t pRowIdx, ConvFunc<T> pToVal) const
  1095. {
  1096. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  1097. if (columnIdx < 0)
  1098. {
  1099. throw std::out_of_range("column not found: " + pColumnName);
  1100. }
  1101. return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx, pToVal);
  1102. }
  1103. /**
  1104. * @brief Get cell by column index and row name.
  1105. * @param pColumnIdx zero-based column index.
  1106. * @param pRowName row label name.
  1107. * @returns cell data.
  1108. */
  1109. template<typename T>
  1110. T GetCell(const size_t pColumnIdx, const std::string& pRowName) const
  1111. {
  1112. const ssize_t rowIdx = GetRowIdx(pRowName);
  1113. if (rowIdx < 0)
  1114. {
  1115. throw std::out_of_range("row not found: " + pRowName);
  1116. }
  1117. return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx));
  1118. }
  1119. /**
  1120. * @brief Get cell by column index and row name.
  1121. * @param pColumnIdx zero-based column index.
  1122. * @param pRowName row label name.
  1123. * @param pToVal conversion function.
  1124. * @returns cell data.
  1125. */
  1126. template<typename T>
  1127. T GetCell(const size_t pColumnIdx, const std::string& pRowName, ConvFunc<T> pToVal) const
  1128. {
  1129. const ssize_t rowIdx = GetRowIdx(pRowName);
  1130. if (rowIdx < 0)
  1131. {
  1132. throw std::out_of_range("row not found: " + pRowName);
  1133. }
  1134. return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx), pToVal);
  1135. }
  1136. /**
  1137. * @brief Set cell by index.
  1138. * @param pRowIdx zero-based row index.
  1139. * @param pColumnIdx zero-based column index.
  1140. * @param pCell cell data.
  1141. */
  1142. template<typename T>
  1143. void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T& pCell)
  1144. {
  1145. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1146. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1147. while ((dataRowIdx + 1) > GetDataRowCount())
  1148. {
  1149. std::vector<std::string> row;
  1150. row.resize(GetDataColumnCount());
  1151. mData.push_back(row);
  1152. }
  1153. if ((dataColumnIdx + 1) > GetDataColumnCount())
  1154. {
  1155. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  1156. {
  1157. itRow->resize(dataColumnIdx + 1);
  1158. }
  1159. }
  1160. std::string str;
  1161. Converter<T> converter(mConverterParams);
  1162. converter.ToStr(pCell, str);
  1163. mData.at(dataRowIdx).at(dataColumnIdx) = str;
  1164. }
  1165. /**
  1166. * @brief Set cell by name.
  1167. * @param pColumnName column label name.
  1168. * @param pRowName row label name.
  1169. * @param pCell cell data.
  1170. */
  1171. template<typename T>
  1172. void SetCell(const std::string& pColumnName, const std::string& pRowName, const T& pCell)
  1173. {
  1174. const ssize_t columnIdx = GetColumnIdx(pColumnName);
  1175. if (columnIdx < 0)
  1176. {
  1177. throw std::out_of_range("column not found: " + pColumnName);
  1178. }
  1179. const ssize_t rowIdx = GetRowIdx(pRowName);
  1180. if (rowIdx < 0)
  1181. {
  1182. throw std::out_of_range("row not found: " + pRowName);
  1183. }
  1184. SetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pCell);
  1185. }
  1186. /**
  1187. * @brief Get column name
  1188. * @param pColumnIdx zero-based column index.
  1189. * @returns column name.
  1190. */
  1191. std::string GetColumnName(const size_t pColumnIdx)
  1192. {
  1193. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1194. if (mLabelParams.mColumnNameIdx < 0)
  1195. {
  1196. throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
  1197. }
  1198. return mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx);
  1199. }
  1200. /**
  1201. * @brief Set column name
  1202. * @param pColumnIdx zero-based column index.
  1203. * @param pColumnName column name.
  1204. */
  1205. void SetColumnName(size_t pColumnIdx, const std::string& pColumnName)
  1206. {
  1207. if (mLabelParams.mColumnNameIdx < 0)
  1208. {
  1209. throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
  1210. }
  1211. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1212. mColumnNames[pColumnName] = dataColumnIdx;
  1213. // increase table size if necessary:
  1214. const size_t rowIdx = static_cast<size_t>(mLabelParams.mColumnNameIdx);
  1215. if (rowIdx >= mData.size())
  1216. {
  1217. mData.resize(rowIdx + 1);
  1218. }
  1219. auto& row = mData[rowIdx];
  1220. if (dataColumnIdx >= row.size())
  1221. {
  1222. row.resize(dataColumnIdx + 1);
  1223. }
  1224. mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx) = pColumnName;
  1225. }
  1226. /**
  1227. * @brief Get column names
  1228. * @returns vector of column names.
  1229. */
  1230. std::vector<std::string> GetColumnNames()
  1231. {
  1232. if (mLabelParams.mColumnNameIdx >= 0)
  1233. {
  1234. return std::vector<std::string>(mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).begin() +
  1235. (mLabelParams.mRowNameIdx + 1),
  1236. mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).end());
  1237. }
  1238. return std::vector<std::string>();
  1239. }
  1240. /**
  1241. * @brief Get row name
  1242. * @param pRowIdx zero-based column index.
  1243. * @returns row name.
  1244. */
  1245. std::string GetRowName(const size_t pRowIdx)
  1246. {
  1247. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1248. if (mLabelParams.mRowNameIdx < 0)
  1249. {
  1250. throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
  1251. }
  1252. return mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx));
  1253. }
  1254. /**
  1255. * @brief Set row name
  1256. * @param pRowIdx zero-based row index.
  1257. * @param pRowName row name.
  1258. */
  1259. void SetRowName(size_t pRowIdx, const std::string& pRowName)
  1260. {
  1261. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1262. mRowNames[pRowName] = dataRowIdx;
  1263. if (mLabelParams.mRowNameIdx < 0)
  1264. {
  1265. throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
  1266. }
  1267. // increase table size if necessary:
  1268. if (dataRowIdx >= mData.size())
  1269. {
  1270. mData.resize(dataRowIdx + 1);
  1271. }
  1272. auto& row = mData[dataRowIdx];
  1273. if (mLabelParams.mRowNameIdx >= static_cast<ssize_t>(row.size()))
  1274. {
  1275. row.resize(static_cast<size_t>(mLabelParams.mRowNameIdx) + 1);
  1276. }
  1277. mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx)) = pRowName;
  1278. }
  1279. /**
  1280. * @brief Get row names
  1281. * @returns vector of row names.
  1282. */
  1283. std::vector<std::string> GetRowNames()
  1284. {
  1285. std::vector<std::string> rownames;
  1286. if (mLabelParams.mRowNameIdx >= 0)
  1287. {
  1288. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  1289. {
  1290. if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
  1291. {
  1292. rownames.push_back(itRow->at(static_cast<size_t>(mLabelParams.mRowNameIdx)));
  1293. }
  1294. }
  1295. }
  1296. return rownames;
  1297. }
  1298. private:
  1299. void ReadCsv()
  1300. {
  1301. std::ifstream stream;
  1302. stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
  1303. stream.open(mPath, std::ios::binary);
  1304. ReadCsv(stream);
  1305. }
  1306. void ReadCsv(std::istream& pStream)
  1307. {
  1308. Clear();
  1309. pStream.seekg(0, std::ios::end);
  1310. std::streamsize length = pStream.tellg();
  1311. pStream.seekg(0, std::ios::beg);
  1312. #ifdef HAS_CODECVT
  1313. std::vector<char> bom2b(2, '\0');
  1314. if (length >= 2)
  1315. {
  1316. pStream.read(bom2b.data(), 2);
  1317. pStream.seekg(0, std::ios::beg);
  1318. }
  1319. static const std::vector<char> bomU16le = { '\xff', '\xfe' };
  1320. static const std::vector<char> bomU16be = { '\xfe', '\xff' };
  1321. if ((bom2b == bomU16le) || (bom2b == bomU16be))
  1322. {
  1323. mIsUtf16 = true;
  1324. mIsLE = (bom2b == bomU16le);
  1325. std::wifstream wstream;
  1326. wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit);
  1327. wstream.open(mPath, std::ios::binary);
  1328. if (mIsLE)
  1329. {
  1330. wstream.imbue(std::locale(wstream.getloc(),
  1331. new std::codecvt_utf16<wchar_t, 0x10ffff,
  1332. static_cast<std::codecvt_mode>(std::consume_header |
  1333. std::little_endian)>));
  1334. }
  1335. else
  1336. {
  1337. wstream.imbue(std::locale(wstream.getloc(),
  1338. new std::codecvt_utf16<wchar_t, 0x10ffff,
  1339. std::consume_header>));
  1340. }
  1341. std::wstringstream wss;
  1342. wss << wstream.rdbuf();
  1343. std::string utf8 = ToString(wss.str());
  1344. std::stringstream ss(utf8);
  1345. ParseCsv(ss, static_cast<std::streamsize>(utf8.size()));
  1346. }
  1347. else
  1348. #endif
  1349. {
  1350. // check for UTF-8 Byte order mark and skip it when found
  1351. if (length >= 3)
  1352. {
  1353. std::vector<char> bom3b(3, '\0');
  1354. pStream.read(bom3b.data(), 3);
  1355. static const std::vector<char> bomU8 = { '\xef', '\xbb', '\xbf' };
  1356. if (bom3b != bomU8)
  1357. {
  1358. // file does not start with a UTF-8 Byte order mark
  1359. pStream.seekg(0, std::ios::beg);
  1360. }
  1361. else
  1362. {
  1363. // file did start with a UTF-8 Byte order mark, simply skip it
  1364. length -= 3;
  1365. }
  1366. }
  1367. ParseCsv(pStream, length);
  1368. }
  1369. }
  1370. void ParseCsv(std::istream& pStream, std::streamsize p_FileLength)
  1371. {
  1372. const std::streamsize bufLength = 64 * 1024;
  1373. std::vector<char> buffer(bufLength);
  1374. std::vector<std::string> row;
  1375. std::string cell;
  1376. bool quoted = false;
  1377. int cr = 0;
  1378. int lf = 0;
  1379. while (p_FileLength > 0)
  1380. {
  1381. const std::streamsize toReadLength = std::min<std::streamsize>(p_FileLength, bufLength);
  1382. pStream.read(buffer.data(), toReadLength);
  1383. // With user-specified istream opened in non-binary mode on windows, we may have a
  1384. // data length mismatch, so ensure we don't parse outside actual data length read.
  1385. const std::streamsize readLength = pStream.gcount();
  1386. if (readLength <= 0)
  1387. {
  1388. break;
  1389. }
  1390. for (size_t i = 0; i < static_cast<size_t>(readLength); ++i)
  1391. {
  1392. if (buffer[i] == mSeparatorParams.mQuoteChar)
  1393. {
  1394. if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar))
  1395. {
  1396. quoted = !quoted;
  1397. }
  1398. cell += buffer[i];
  1399. }
  1400. else if (buffer[i] == mSeparatorParams.mSeparator)
  1401. {
  1402. if (!quoted)
  1403. {
  1404. row.push_back(Unquote(Trim(cell)));
  1405. cell.clear();
  1406. }
  1407. else
  1408. {
  1409. cell += buffer[i];
  1410. }
  1411. }
  1412. else if (buffer[i] == '\r')
  1413. {
  1414. if (mSeparatorParams.mQuotedLinebreaks && quoted)
  1415. {
  1416. cell += buffer[i];
  1417. }
  1418. else
  1419. {
  1420. ++cr;
  1421. }
  1422. }
  1423. else if (buffer[i] == '\n')
  1424. {
  1425. if (mSeparatorParams.mQuotedLinebreaks && quoted)
  1426. {
  1427. cell += buffer[i];
  1428. }
  1429. else
  1430. {
  1431. ++lf;
  1432. if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty())
  1433. {
  1434. // skip empty line
  1435. }
  1436. else
  1437. {
  1438. row.push_back(Unquote(Trim(cell)));
  1439. if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() &&
  1440. (row.at(0)[0] == mLineReaderParams.mCommentPrefix))
  1441. {
  1442. // skip comment line
  1443. }
  1444. else
  1445. {
  1446. mData.push_back(row);
  1447. }
  1448. cell.clear();
  1449. row.clear();
  1450. quoted = false;
  1451. }
  1452. }
  1453. }
  1454. else
  1455. {
  1456. cell += buffer[i];
  1457. }
  1458. }
  1459. p_FileLength -= readLength;
  1460. }
  1461. // Handle last line without linebreak
  1462. if (!cell.empty() || !row.empty())
  1463. {
  1464. row.push_back(Unquote(Trim(cell)));
  1465. cell.clear();
  1466. mData.push_back(row);
  1467. row.clear();
  1468. }
  1469. // Assume CR/LF if at least half the linebreaks have CR
  1470. mSeparatorParams.mHasCR = (cr > (lf / 2));
  1471. // Set up column labels
  1472. UpdateColumnNames();
  1473. // Set up row labels
  1474. UpdateRowNames();
  1475. }
  1476. void WriteCsv() const
  1477. {
  1478. #ifdef HAS_CODECVT
  1479. if (mIsUtf16)
  1480. {
  1481. std::stringstream ss;
  1482. WriteCsv(ss);
  1483. std::string utf8 = ss.str();
  1484. std::wstring wstr = ToWString(utf8);
  1485. std::wofstream wstream;
  1486. wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit);
  1487. wstream.open(mPath, std::ios::binary | std::ios::trunc);
  1488. if (mIsLE)
  1489. {
  1490. wstream.imbue(std::locale(wstream.getloc(),
  1491. new std::codecvt_utf16<wchar_t, 0x10ffff,
  1492. static_cast<std::codecvt_mode>(std::little_endian)>));
  1493. }
  1494. else
  1495. {
  1496. wstream.imbue(std::locale(wstream.getloc(),
  1497. new std::codecvt_utf16<wchar_t, 0x10ffff>));
  1498. }
  1499. wstream << static_cast<wchar_t>(0xfeff);
  1500. wstream << wstr;
  1501. }
  1502. else
  1503. #endif
  1504. {
  1505. std::ofstream stream;
  1506. stream.exceptions(std::ofstream::failbit | std::ofstream::badbit);
  1507. stream.open(mPath, std::ios::binary | std::ios::trunc);
  1508. WriteCsv(stream);
  1509. }
  1510. }
  1511. void WriteCsv(std::ostream& pStream) const
  1512. {
  1513. for (auto itr = mData.begin(); itr != mData.end(); ++itr)
  1514. {
  1515. for (auto itc = itr->begin(); itc != itr->end(); ++itc)
  1516. {
  1517. if (mSeparatorParams.mAutoQuote &&
  1518. ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) ||
  1519. (itc->find(' ') != std::string::npos)))
  1520. {
  1521. // escape quotes in string
  1522. std::string str = *itc;
  1523. const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
  1524. ReplaceString(str, quoteCharStr, quoteCharStr + quoteCharStr);
  1525. pStream << quoteCharStr << str << quoteCharStr;
  1526. }
  1527. else
  1528. {
  1529. pStream << *itc;
  1530. }
  1531. if (std::distance(itc, itr->end()) > 1)
  1532. {
  1533. pStream << mSeparatorParams.mSeparator;
  1534. }
  1535. }
  1536. pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n");
  1537. }
  1538. }
  1539. size_t GetDataRowCount() const
  1540. {
  1541. return mData.size();
  1542. }
  1543. size_t GetDataColumnCount() const
  1544. {
  1545. return (mData.size() > 0) ? mData.at(0).size() : 0;
  1546. }
  1547. inline size_t GetDataRowIndex(const size_t pRowIdx) const
  1548. {
  1549. return pRowIdx + static_cast<size_t>(mLabelParams.mColumnNameIdx + 1);
  1550. }
  1551. inline size_t GetDataColumnIndex(const size_t pColumnIdx) const
  1552. {
  1553. return pColumnIdx + static_cast<size_t>(mLabelParams.mRowNameIdx + 1);
  1554. }
  1555. std::string Trim(const std::string& pStr)
  1556. {
  1557. if (mSeparatorParams.mTrim)
  1558. {
  1559. std::string str = pStr;
  1560. // ltrim
  1561. str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !isspace(ch); }));
  1562. // rtrim
  1563. str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !isspace(ch); }).base(), str.end());
  1564. return str;
  1565. }
  1566. else
  1567. {
  1568. return pStr;
  1569. }
  1570. }
  1571. std::string Unquote(const std::string& pStr)
  1572. {
  1573. if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) &&
  1574. (pStr.front() == mSeparatorParams.mQuoteChar) &&
  1575. (pStr.back() == mSeparatorParams.mQuoteChar))
  1576. {
  1577. // remove start/end quotes
  1578. std::string str = pStr.substr(1, pStr.size() - 2);
  1579. // unescape quotes in string
  1580. const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
  1581. ReplaceString(str, quoteCharStr + quoteCharStr, quoteCharStr);
  1582. return str;
  1583. }
  1584. else
  1585. {
  1586. return pStr;
  1587. }
  1588. }
  1589. void UpdateColumnNames()
  1590. {
  1591. mColumnNames.clear();
  1592. if ((mLabelParams.mColumnNameIdx >= 0) &&
  1593. (static_cast<ssize_t>(mData.size()) > mLabelParams.mColumnNameIdx))
  1594. {
  1595. size_t i = 0;
  1596. for (auto& columnName : mData[static_cast<size_t>(mLabelParams.mColumnNameIdx)])
  1597. {
  1598. mColumnNames[columnName] = i++;
  1599. }
  1600. }
  1601. }
  1602. void UpdateRowNames()
  1603. {
  1604. mRowNames.clear();
  1605. if ((mLabelParams.mRowNameIdx >= 0) &&
  1606. (static_cast<ssize_t>(mData.size()) >
  1607. (mLabelParams.mColumnNameIdx + 1)))
  1608. {
  1609. size_t i = 0;
  1610. for (auto& dataRow : mData)
  1611. {
  1612. if (static_cast<ssize_t>(dataRow.size()) > mLabelParams.mRowNameIdx)
  1613. {
  1614. mRowNames[dataRow[static_cast<size_t>(mLabelParams.mRowNameIdx)]] = i++;
  1615. }
  1616. }
  1617. }
  1618. }
  1619. #ifdef HAS_CODECVT
  1620. #if defined(_MSC_VER)
  1621. #pragma warning (push)
  1622. #pragma warning (disable: 4996)
  1623. #endif
  1624. static std::string ToString(const std::wstring& pWStr)
  1625. {
  1626. return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.to_bytes(pWStr);
  1627. }
  1628. static std::wstring ToWString(const std::string& pStr)
  1629. {
  1630. return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.from_bytes(pStr);
  1631. }
  1632. #if defined(_MSC_VER)
  1633. #pragma warning (pop)
  1634. #endif
  1635. #endif
  1636. static void ReplaceString(std::string& pStr, const std::string& pSearch, const std::string& pReplace)
  1637. {
  1638. size_t pos = 0;
  1639. while ((pos = pStr.find(pSearch, pos)) != std::string::npos)
  1640. {
  1641. pStr.replace(pos, pSearch.size(), pReplace);
  1642. pos += pReplace.size();
  1643. }
  1644. }
  1645. private:
  1646. std::string mPath;
  1647. LabelParams mLabelParams;
  1648. SeparatorParams mSeparatorParams;
  1649. ConverterParams mConverterParams;
  1650. LineReaderParams mLineReaderParams;
  1651. std::vector<std::vector<std::string>> mData;
  1652. std::map<std::string, size_t> mColumnNames;
  1653. std::map<std::string, size_t> mRowNames;
  1654. #ifdef HAS_CODECVT
  1655. bool mIsUtf16 = false;
  1656. bool mIsLE = false;
  1657. #endif
  1658. };
  1659. }