123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821 |
- /*
- * rapidcsv.h
- *
- * URL: https://github.com/d99kris/rapidcsv
- * Version: 8.68
- *
- * Copyright (C) 2017-2022 Kristofer Berggren
- * All rights reserved.
- *
- * rapidcsv is distributed under the BSD 3-Clause license, see LICENSE for details.
- *
- */
- #pragma once
- #include <algorithm>
- #include <cassert>
- #include <cmath>
- #ifdef HAS_CODECVT
- #include <codecvt>
- #include <locale>
- #endif
- #include <fstream>
- #include <functional>
- #include <iostream>
- #include <limits>
- #include <map>
- #include <sstream>
- #include <string>
- #include <typeinfo>
- #include <vector>
- #if defined(_MSC_VER)
- #include <BaseTsd.h>
- typedef SSIZE_T ssize_t;
- #endif
- namespace rapidcsv
- {
- #if defined(_MSC_VER)
- static const bool sPlatformHasCR = true;
- #else
- static const bool sPlatformHasCR = false;
- #endif
- /**
- * @brief Datastructure holding parameters controlling how invalid numbers (including
- * empty strings) should be handled.
- */
- struct ConverterParams
- {
- /**
- * @brief Constructor
- * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be
- * converted to a default numerical value, instead of causing
- * an exception to be thrown (default).
- * @param pDefaultFloat floating-point default value to represent invalid numbers.
- * @param pDefaultInteger integer default value to represent invalid numbers.
- * @param pNumericLocale specifies whether to honor LC_NUMERIC locale (default
- * true).
- */
- explicit ConverterParams(const bool pHasDefaultConverter = false,
- const long double pDefaultFloat = std::numeric_limits<long double>::signaling_NaN(),
- const long long pDefaultInteger = 0,
- const bool pNumericLocale = true)
- : mHasDefaultConverter(pHasDefaultConverter)
- , mDefaultFloat(pDefaultFloat)
- , mDefaultInteger(pDefaultInteger)
- , mNumericLocale(pNumericLocale)
- {
- }
- /**
- * @brief specifies if conversion of non-numerical strings shall be converted to a default
- * numerical value, instead of causing an exception to be thrown (default).
- */
- bool mHasDefaultConverter;
- /**
- * @brief floating-point default value to represent invalid numbers.
- */
- long double mDefaultFloat;
- /**
- * @brief integer default value to represent invalid numbers.
- */
- long long mDefaultInteger;
- /**
- * @brief specifies whether to honor LC_NUMERIC locale.
- */
- bool mNumericLocale;
- };
- /**
- * @brief Exception thrown when attempting to access Document data in a datatype which
- * is not supported by the Converter class.
- */
- class no_converter : public std::exception
- {
- /**
- * @brief Provides details about the exception
- * @returns an explanatory string
- */
- virtual const char* what() const throw()
- {
- return "unsupported conversion datatype";
- }
- };
- /**
- * @brief Class providing conversion to/from numerical datatypes and strings. Only
- * intended for rapidcsv internal usage, but exposed externally to allow
- * specialization for custom datatype conversions.
- */
- template<typename T>
- class Converter
- {
- public:
- /**
- * @brief Constructor
- * @param pConverterParams specifies how conversion of non-numerical values to
- * numerical datatype shall be handled.
- */
- Converter(const ConverterParams& pConverterParams)
- : mConverterParams(pConverterParams)
- {
- }
- /**
- * @brief Converts numerical value to string representation.
- * @param pVal numerical value
- * @param pStr output string
- */
- void ToStr(const T& pVal, std::string& pStr) const
- {
- if (typeid(T) == typeid(int) ||
- typeid(T) == typeid(long) ||
- typeid(T) == typeid(long long) ||
- typeid(T) == typeid(unsigned) ||
- typeid(T) == typeid(unsigned long) ||
- typeid(T) == typeid(unsigned long long) ||
- typeid(T) == typeid(float) ||
- typeid(T) == typeid(double) ||
- typeid(T) == typeid(long double) ||
- typeid(T) == typeid(char))
- {
- std::ostringstream out;
- out << pVal;
- pStr = out.str();
- }
- else
- {
- throw no_converter();
- }
- }
- /**
- * @brief Converts string holding a numerical value to numerical datatype representation.
- * @param pVal numerical value
- * @param pStr output string
- */
- void ToVal(const std::string& pStr, T& pVal) const
- {
- try
- {
- if (typeid(T) == typeid(int))
- {
- pVal = static_cast<T>(std::stoi(pStr));
- return;
- }
- else if (typeid(T) == typeid(long))
- {
- pVal = static_cast<T>(std::stol(pStr));
- return;
- }
- else if (typeid(T) == typeid(long long))
- {
- pVal = static_cast<T>(std::stoll(pStr));
- return;
- }
- else if (typeid(T) == typeid(unsigned))
- {
- pVal = static_cast<T>(std::stoul(pStr));
- return;
- }
- else if (typeid(T) == typeid(unsigned long))
- {
- pVal = static_cast<T>(std::stoul(pStr));
- return;
- }
- else if (typeid(T) == typeid(unsigned long long))
- {
- pVal = static_cast<T>(std::stoull(pStr));
- return;
- }
- }
- catch (...)
- {
- if (!mConverterParams.mHasDefaultConverter)
- {
- throw;
- }
- else
- {
- pVal = static_cast<T>(mConverterParams.mDefaultInteger);
- return;
- }
- }
- try
- {
- if (mConverterParams.mNumericLocale)
- {
- if (typeid(T) == typeid(float))
- {
- pVal = static_cast<T>(std::stof(pStr));
- return;
- }
- else if (typeid(T) == typeid(double))
- {
- pVal = static_cast<T>(std::stod(pStr));
- return;
- }
- else if (typeid(T) == typeid(long double))
- {
- pVal = static_cast<T>(std::stold(pStr));
- return;
- }
- }
- else
- {
- if ((typeid(T) == typeid(float)) ||
- (typeid(T) == typeid(double)) ||
- (typeid(T) == typeid(long double)))
- {
- std::istringstream iss(pStr);
- iss >> pVal;
- if (iss.fail() || iss.bad() || !iss.eof())
- {
- throw std::invalid_argument("istringstream: no conversion");
- }
- return;
- }
- }
- }
- catch (...)
- {
- if (!mConverterParams.mHasDefaultConverter)
- {
- throw;
- }
- else
- {
- pVal = static_cast<T>(mConverterParams.mDefaultFloat);
- return;
- }
- }
- if (typeid(T) == typeid(char))
- {
- pVal = static_cast<T>(pStr[0]);
- return;
- }
- else
- {
- throw no_converter();
- }
- }
- private:
- const ConverterParams& mConverterParams;
- };
- /**
- * @brief Specialized implementation handling string to string conversion.
- * @param pVal string
- * @param pStr string
- */
- template<>
- inline void Converter<std::string>::ToStr(const std::string& pVal, std::string& pStr) const
- {
- pStr = pVal;
- }
- /**
- * @brief Specialized implementation handling string to string conversion.
- * @param pVal string
- * @param pStr string
- */
- template<>
- inline void Converter<std::string>::ToVal(const std::string& pStr, std::string& pVal) const
- {
- pVal = pStr;
- }
- template<typename T>
- using ConvFunc = std::function<void (const std::string & pStr, T & pVal)>;
- /**
- * @brief Datastructure holding parameters controlling which row and column should be
- * treated as labels.
- */
- struct LabelParams
- {
- /**
- * @brief Constructor
- * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting
- * it to -1 prevents column lookup by label name, and gives access
- * to all rows as document data. Default: 0
- * @param pRowNameIdx specifies the zero-based column index of the row labels, setting
- * it to -1 prevents row lookup by label name, and gives access
- * to all columns as document data. Default: -1
- */
- explicit LabelParams(const ssize_t pColumnNameIdx = 0, const ssize_t pRowNameIdx = -1)
- : mColumnNameIdx(pColumnNameIdx)
- , mRowNameIdx(pRowNameIdx)
- {
- if (mColumnNameIdx < -1)
- {
- const std::string errStr = "invalid column name index " +
- std::to_string(mColumnNameIdx) + " < -1";
- throw std::out_of_range(errStr);
- }
- if (mRowNameIdx < -1)
- {
- const std::string errStr = "invalid row name index " +
- std::to_string(mRowNameIdx) + " < -1";
- throw std::out_of_range(errStr);
- }
- }
- /**
- * @brief specifies the zero-based row index of the column labels.
- */
- ssize_t mColumnNameIdx;
- /**
- * @brief specifies the zero-based column index of the row labels.
- */
- ssize_t mRowNameIdx;
- };
- /**
- * @brief Datastructure holding parameters controlling how the CSV data fields are separated.
- */
- struct SeparatorParams
- {
- /**
- * @brief Constructor
- * @param pSeparator specifies the column separator (default ',').
- * @param pTrim specifies whether to trim leading and trailing spaces from
- * cells read (default false).
- * @param pHasCR specifies whether a new document (i.e. not an existing document read)
- * should use CR/LF instead of only LF (default is to use standard
- * behavior of underlying platforms - CR/LF for Win, and LF for others).
- * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false)
- * @param pAutoQuote specifies whether to automatically dequote data during read, and add
- * quotes during write (default true).
- * @param pQuoteChar specifies the quote character (default '\"').
- */
- explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false,
- const bool pHasCR = sPlatformHasCR, const bool pQuotedLinebreaks = false,
- const bool pAutoQuote = true, const char pQuoteChar = '"')
- : mSeparator(pSeparator)
- , mTrim(pTrim)
- , mHasCR(pHasCR)
- , mQuotedLinebreaks(pQuotedLinebreaks)
- , mAutoQuote(pAutoQuote)
- , mQuoteChar(pQuoteChar)
- {
- }
- /**
- * @brief specifies the column separator.
- */
- char mSeparator;
- /**
- * @brief specifies whether to trim leading and trailing spaces from cells read.
- */
- bool mTrim;
- /**
- * @brief specifies whether new documents should use CR/LF instead of LF.
- */
- bool mHasCR;
- /**
- * @brief specifies whether to allow line breaks in quoted text.
- */
- bool mQuotedLinebreaks;
- /**
- * @brief specifies whether to automatically dequote cell data.
- */
- bool mAutoQuote;
- /**
- * @brief specifies the quote character.
- */
- char mQuoteChar;
- };
- /**
- * @brief Datastructure holding parameters controlling how special line formats should be
- * treated.
- */
- struct LineReaderParams
- {
- /**
- * @brief Constructor
- * @param pSkipCommentLines specifies whether to skip lines prefixed with
- * mCommentPrefix. Default: false
- * @param pCommentPrefix specifies which prefix character to indicate a comment
- * line. Default: #
- * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false
- */
- explicit LineReaderParams(const bool pSkipCommentLines = false,
- const char pCommentPrefix = '#',
- const bool pSkipEmptyLines = false)
- : mSkipCommentLines(pSkipCommentLines)
- , mCommentPrefix(pCommentPrefix)
- , mSkipEmptyLines(pSkipEmptyLines)
- {
- }
- /**
- * @brief specifies whether to skip lines prefixed with mCommentPrefix.
- */
- bool mSkipCommentLines;
- /**
- * @brief specifies which prefix character to indicate a comment line.
- */
- char mCommentPrefix;
- /**
- * @brief specifies whether to skip empty lines.
- */
- bool mSkipEmptyLines;
- };
- /**
- * @brief Class representing a CSV document.
- */
- class Document
- {
- public:
- /**
- * @brief Constructor
- * @param pPath specifies the path of an existing CSV-file to populate the Document
- * data with.
- * @param pLabelParams specifies which row and column should be treated as labels.
- * @param pSeparatorParams specifies which field and row separators should be used.
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * @param pLineReaderParams specifies how special line formats should be treated.
- */
- explicit Document(const std::string& pPath = std::string(),
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- : mPath(pPath)
- , mLabelParams(pLabelParams)
- , mSeparatorParams(pSeparatorParams)
- , mConverterParams(pConverterParams)
- , mLineReaderParams(pLineReaderParams)
- , mData()
- , mColumnNames()
- , mRowNames()
- {
- if (!mPath.empty())
- {
- ReadCsv();
- }
- }
- /**
- * @brief Constructor
- * @param pStream specifies a binary input stream to read CSV data from.
- * @param pLabelParams specifies which row and column should be treated as labels.
- * @param pSeparatorParams specifies which field and row separators should be used.
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * @param pLineReaderParams specifies how special line formats should be treated.
- */
- explicit Document(std::istream& pStream,
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- : mPath()
- , mLabelParams(pLabelParams)
- , mSeparatorParams(pSeparatorParams)
- , mConverterParams(pConverterParams)
- , mLineReaderParams(pLineReaderParams)
- , mData()
- , mColumnNames()
- , mRowNames()
- {
- ReadCsv(pStream);
- }
- /**
- * @brief Read Document data from file.
- * @param pPath specifies the path of an existing CSV-file to populate the Document
- * data with.
- * @param pLabelParams specifies which row and column should be treated as labels.
- * @param pSeparatorParams specifies which field and row separators should be used.
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * @param pLineReaderParams specifies how special line formats should be treated.
- */
- void Load(const std::string& pPath,
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- {
- mPath = pPath;
- mLabelParams = pLabelParams;
- mSeparatorParams = pSeparatorParams;
- mConverterParams = pConverterParams;
- mLineReaderParams = pLineReaderParams;
- ReadCsv();
- }
- /**
- * @brief Read Document data from stream.
- * @param pStream specifies a binary input stream to read CSV data from.
- * @param pLabelParams specifies which row and column should be treated as labels.
- * @param pSeparatorParams specifies which field and row separators should be used.
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * @param pLineReaderParams specifies how special line formats should be treated.
- */
- void Load(std::istream& pStream,
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- {
- mPath = "";
- mLabelParams = pLabelParams;
- mSeparatorParams = pSeparatorParams;
- mConverterParams = pConverterParams;
- mLineReaderParams = pLineReaderParams;
- ReadCsv(pStream);
- }
- /**
- * @brief Write Document data to file.
- * @param pPath optionally specifies the path where the CSV-file will be created
- * (if not specified, the original path provided when creating or
- * loading the Document data will be used).
- */
- void Save(const std::string& pPath = std::string())
- {
- if (!pPath.empty())
- {
- mPath = pPath;
- }
- WriteCsv();
- }
- /**
- * @brief Write Document data to stream.
- * @param pStream specifies a binary output stream to write the data to.
- */
- void Save(std::ostream& pStream)
- {
- WriteCsv(pStream);
- }
- /**
- * @brief Clears loaded Document data.
- *
- */
- void Clear()
- {
- mData.clear();
- mColumnNames.clear();
- mRowNames.clear();
- #ifdef HAS_CODECVT
- mIsUtf16 = false;
- mIsLE = false;
- #endif
- }
- /**
- * @brief Get column index by name.
- * @param pColumnName column label name.
- * @returns zero-based column index.
- */
- ssize_t GetColumnIdx(const std::string& pColumnName) const
- {
- if (mLabelParams.mColumnNameIdx >= 0)
- {
- if (mColumnNames.find(pColumnName) != mColumnNames.end())
- {
- return static_cast<ssize_t>(mColumnNames.at(pColumnName)) - (mLabelParams.mRowNameIdx + 1);
- }
- }
- return -1;
- }
- /**
- * @brief Get column by index.
- * @param pColumnIdx zero-based column index.
- * @returns vector of column data.
- */
- template<typename T>
- std::vector<T> GetColumn(const size_t pColumnIdx) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- std::vector<T> column;
- Converter<T> converter(mConverterParams);
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
- {
- if (dataColumnIdx < itRow->size())
- {
- T val;
- converter.ToVal(itRow->at(dataColumnIdx), val);
- column.push_back(val);
- }
- else
- {
- const std::string errStr = "requested column index " +
- std::to_string(pColumnIdx) + " >= " +
- std::to_string(itRow->size() - GetDataColumnIndex(0)) +
- " (number of columns on row index " +
- std::to_string(std::distance(mData.begin(), itRow) -
- (mLabelParams.mColumnNameIdx + 1)) + ")";
- throw std::out_of_range(errStr);
- }
- }
- }
- return column;
- }
- /**
- * @brief Get column by index.
- * @param pColumnIdx zero-based column index.
- * @param pToVal conversion function.
- * @returns vector of column data.
- */
- template<typename T>
- std::vector<T> GetColumn(const size_t pColumnIdx, ConvFunc<T> pToVal) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- std::vector<T> column;
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
- {
- T val;
- pToVal(itRow->at(dataColumnIdx), val);
- column.push_back(val);
- }
- }
- return column;
- }
- /**
- * @brief Get column by name.
- * @param pColumnName column label name.
- * @returns vector of column data.
- */
- template<typename T>
- std::vector<T> GetColumn(const std::string& pColumnName) const
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetColumn<T>(static_cast<size_t>(columnIdx));
- }
- /**
- * @brief Get column by name.
- * @param pColumnName column label name.
- * @param pToVal conversion function.
- * @returns vector of column data.
- */
- template<typename T>
- std::vector<T> GetColumn(const std::string& pColumnName, ConvFunc<T> pToVal) const
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetColumn<T>(static_cast<size_t>(columnIdx), pToVal);
- }
- /**
- * @brief Set column by index.
- * @param pColumnIdx zero-based column index.
- * @param pColumn vector of column data.
- */
- template<typename T>
- void SetColumn(const size_t pColumnIdx, const std::vector<T>& pColumn)
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- while (GetDataRowIndex(pColumn.size()) > GetDataRowCount())
- {
- std::vector<std::string> row;
- row.resize(GetDataColumnCount());
- mData.push_back(row);
- }
- if ((dataColumnIdx + 1) > GetDataColumnCount())
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- itRow->resize(GetDataColumnIndex(dataColumnIdx + 1));
- }
- }
- Converter<T> converter(mConverterParams);
- for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
- {
- std::string str;
- converter.ToStr(*itRow, str);
- mData.at(static_cast<size_t>(std::distance(pColumn.begin(), itRow) + mLabelParams.mColumnNameIdx + 1)).at(
- dataColumnIdx) = str;
- }
- }
- /**
- * @brief Set column by name.
- * @param pColumnName column label name.
- * @param pColumn vector of column data.
- */
- template<typename T>
- void SetColumn(const std::string& pColumnName, const std::vector<T>& pColumn)
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- SetColumn<T>(static_cast<size_t>(columnIdx), pColumn);
- }
- /**
- * @brief Remove column by index.
- * @param pColumnIdx zero-based column index.
- */
- void RemoveColumn(const size_t pColumnIdx)
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- itRow->erase(itRow->begin() + static_cast<ssize_t>(dataColumnIdx));
- }
- UpdateColumnNames();
- }
- /**
- * @brief Remove column by name.
- * @param pColumnName column label name.
- */
- void RemoveColumn(const std::string& pColumnName)
- {
- ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- RemoveColumn(static_cast<size_t>(columnIdx));
- }
- /**
- * @brief Insert column at specified index.
- * @param pColumnIdx zero-based column index.
- * @param pColumn vector of column data (optional argument).
- * @param pColumnName column label name (optional argument).
- */
- template<typename T>
- void InsertColumn(const size_t pColumnIdx, const std::vector<T>& pColumn = std::vector<T>(),
- const std::string& pColumnName = std::string())
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- std::vector<std::string> column;
- if (pColumn.empty())
- {
- column.resize(GetDataRowCount());
- }
- else
- {
- column.resize(GetDataRowIndex(pColumn.size()));
- Converter<T> converter(mConverterParams);
- for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
- {
- std::string str;
- converter.ToStr(*itRow, str);
- const size_t rowIdx =
- static_cast<size_t>(std::distance(pColumn.begin(), itRow) + (mLabelParams.mColumnNameIdx + 1));
- column.at(rowIdx) = str;
- }
- }
- while (column.size() > GetDataRowCount())
- {
- std::vector<std::string> row;
- const size_t columnCount = std::max<size_t>(static_cast<size_t>(mLabelParams.mColumnNameIdx + 1),
- GetDataColumnCount());
- row.resize(columnCount);
- mData.push_back(row);
- }
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- const size_t rowIdx = static_cast<size_t>(std::distance(mData.begin(), itRow));
- itRow->insert(itRow->begin() + static_cast<ssize_t>(dataColumnIdx), column.at(rowIdx));
- }
- if (!pColumnName.empty())
- {
- SetColumnName(pColumnIdx, pColumnName);
- }
- UpdateColumnNames();
- }
- /**
- * @brief Get number of data columns (excluding label columns).
- * @returns column count.
- */
- size_t GetColumnCount() const
- {
- const ssize_t count = static_cast<ssize_t>((mData.size() > 0) ? mData.at(0).size() : 0) -
- (mLabelParams.mRowNameIdx + 1);
- return (count >= 0) ? static_cast<size_t>(count) : 0;
- }
- /**
- * @brief Get row index by name.
- * @param pRowName row label name.
- * @returns zero-based row index.
- */
- ssize_t GetRowIdx(const std::string& pRowName) const
- {
- if (mLabelParams.mRowNameIdx >= 0)
- {
- if (mRowNames.find(pRowName) != mRowNames.end())
- {
- return static_cast<ssize_t>(mRowNames.at(pRowName)) - (mLabelParams.mColumnNameIdx + 1);
- }
- }
- return -1;
- }
- /**
- * @brief Get row by index.
- * @param pRowIdx zero-based row index.
- * @returns vector of row data.
- */
- template<typename T>
- std::vector<T> GetRow(const size_t pRowIdx) const
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- std::vector<T> row;
- Converter<T> converter(mConverterParams);
- for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
- {
- if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
- {
- T val;
- converter.ToVal(*itCol, val);
- row.push_back(val);
- }
- }
- return row;
- }
- /**
- * @brief Get row by index.
- * @param pRowIdx zero-based row index.
- * @param pToVal conversion function.
- * @returns vector of row data.
- */
- template<typename T>
- std::vector<T> GetRow(const size_t pRowIdx, ConvFunc<T> pToVal) const
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- std::vector<T> row;
- Converter<T> converter(mConverterParams);
- for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
- {
- if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
- {
- T val;
- pToVal(*itCol, val);
- row.push_back(val);
- }
- }
- return row;
- }
- /**
- * @brief Get row by name.
- * @param pRowName row label name.
- * @returns vector of row data.
- */
- template<typename T>
- std::vector<T> GetRow(const std::string& pRowName) const
- {
- ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetRow<T>(static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Get row by name.
- * @param pRowName row label name.
- * @param pToVal conversion function.
- * @returns vector of row data.
- */
- template<typename T>
- std::vector<T> GetRow(const std::string& pRowName, ConvFunc<T> pToVal) const
- {
- ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetRow<T>(static_cast<size_t>(rowIdx), pToVal);
- }
- /**
- * @brief Set row by index.
- * @param pRowIdx zero-based row index.
- * @param pRow vector of row data.
- */
- template<typename T>
- void SetRow(const size_t pRowIdx, const std::vector<T>& pRow)
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- while ((dataRowIdx + 1) > GetDataRowCount())
- {
- std::vector<std::string> row;
- row.resize(GetDataColumnCount());
- mData.push_back(row);
- }
- if (pRow.size() > GetDataColumnCount())
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- itRow->resize(GetDataColumnIndex(pRow.size()));
- }
- }
- Converter<T> converter(mConverterParams);
- for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
- {
- std::string str;
- converter.ToStr(*itCol, str);
- mData.at(dataRowIdx).at(static_cast<size_t>(std::distance(pRow.begin(),
- itCol) + mLabelParams.mRowNameIdx + 1)) = str;
- }
- }
- /**
- * @brief Set row by name.
- * @param pRowName row label name.
- * @param pRow vector of row data.
- */
- template<typename T>
- void SetRow(const std::string& pRowName, const std::vector<T>& pRow)
- {
- ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return SetRow<T>(static_cast<size_t>(rowIdx), pRow);
- }
- /**
- * @brief Remove row by index.
- * @param pRowIdx zero-based row index.
- */
- void RemoveRow(const size_t pRowIdx)
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- mData.erase(mData.begin() + static_cast<ssize_t>(dataRowIdx));
- UpdateRowNames();
- }
- /**
- * @brief Remove row by name.
- * @param pRowName row label name.
- */
- void RemoveRow(const std::string& pRowName)
- {
- ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- RemoveRow(static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Insert row at specified index.
- * @param pRowIdx zero-based row index.
- * @param pRow vector of row data (optional argument).
- * @param pRowName row label name (optional argument).
- */
- template<typename T>
- void InsertRow(const size_t pRowIdx, const std::vector<T>& pRow = std::vector<T>(),
- const std::string& pRowName = std::string())
- {
- const size_t rowIdx = GetDataRowIndex(pRowIdx);
- std::vector<std::string> row;
- if (pRow.empty())
- {
- row.resize(GetDataColumnCount());
- }
- else
- {
- row.resize(GetDataColumnIndex(pRow.size()));
- Converter<T> converter(mConverterParams);
- for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
- {
- std::string str;
- converter.ToStr(*itCol, str);
- row.at(static_cast<size_t>(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str;
- }
- }
- while (rowIdx > GetDataRowCount())
- {
- std::vector<std::string> tempRow;
- tempRow.resize(GetDataColumnCount());
- mData.push_back(tempRow);
- }
- mData.insert(mData.begin() + static_cast<ssize_t>(rowIdx), row);
- if (!pRowName.empty())
- {
- SetRowName(pRowIdx, pRowName);
- }
- UpdateRowNames();
- }
- /**
- * @brief Get number of data rows (excluding label rows).
- * @returns row count.
- */
- size_t GetRowCount() const
- {
- const ssize_t count = static_cast<ssize_t>(mData.size()) - (mLabelParams.mColumnNameIdx + 1);
- return (count >= 0) ? static_cast<size_t>(count) : 0;
- }
- /**
- * @brief Get cell by index.
- * @param pColumnIdx zero-based column index.
- * @param pRowIdx zero-based row index.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- T val;
- Converter<T> converter(mConverterParams);
- converter.ToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
- return val;
- }
- /**
- * @brief Get cell by index.
- * @param pColumnIdx zero-based column index.
- * @param pRowIdx zero-based row index.
- * @param pToVal conversion function.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc<T> pToVal) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- T val;
- pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
- return val;
- }
- /**
- * @brief Get cell by name.
- * @param pColumnName column label name.
- * @param pRowName row label name.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const std::string& pRowName) const
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- const ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Get cell by name.
- * @param pColumnName column label name.
- * @param pRowName row label name.
- * @param pToVal conversion function.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const std::string& pRowName, ConvFunc<T> pToVal) const
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- const ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pToVal);
- }
- /**
- * @brief Get cell by column name and row index.
- * @param pColumnName column label name.
- * @param pRowIdx zero-based row index.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const size_t pRowIdx) const
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx);
- }
- /**
- * @brief Get cell by column name and row index.
- * @param pColumnName column label name.
- * @param pRowIdx zero-based row index.
- * @param pToVal conversion function.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const size_t pRowIdx, ConvFunc<T> pToVal) const
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx, pToVal);
- }
- /**
- * @brief Get cell by column index and row name.
- * @param pColumnIdx zero-based column index.
- * @param pRowName row label name.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const std::string& pRowName) const
- {
- const ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Get cell by column index and row name.
- * @param pColumnIdx zero-based column index.
- * @param pRowName row label name.
- * @param pToVal conversion function.
- * @returns cell data.
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const std::string& pRowName, ConvFunc<T> pToVal) const
- {
- const ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx), pToVal);
- }
- /**
- * @brief Set cell by index.
- * @param pRowIdx zero-based row index.
- * @param pColumnIdx zero-based column index.
- * @param pCell cell data.
- */
- template<typename T>
- void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T& pCell)
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- while ((dataRowIdx + 1) > GetDataRowCount())
- {
- std::vector<std::string> row;
- row.resize(GetDataColumnCount());
- mData.push_back(row);
- }
- if ((dataColumnIdx + 1) > GetDataColumnCount())
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- itRow->resize(dataColumnIdx + 1);
- }
- }
- std::string str;
- Converter<T> converter(mConverterParams);
- converter.ToStr(pCell, str);
- mData.at(dataRowIdx).at(dataColumnIdx) = str;
- }
- /**
- * @brief Set cell by name.
- * @param pColumnName column label name.
- * @param pRowName row label name.
- * @param pCell cell data.
- */
- template<typename T>
- void SetCell(const std::string& pColumnName, const std::string& pRowName, const T& pCell)
- {
- const ssize_t columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- const ssize_t rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- SetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pCell);
- }
- /**
- * @brief Get column name
- * @param pColumnIdx zero-based column index.
- * @returns column name.
- */
- std::string GetColumnName(const size_t pColumnIdx)
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- if (mLabelParams.mColumnNameIdx < 0)
- {
- throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
- }
- return mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx);
- }
- /**
- * @brief Set column name
- * @param pColumnIdx zero-based column index.
- * @param pColumnName column name.
- */
- void SetColumnName(size_t pColumnIdx, const std::string& pColumnName)
- {
- if (mLabelParams.mColumnNameIdx < 0)
- {
- throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
- }
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- mColumnNames[pColumnName] = dataColumnIdx;
- // increase table size if necessary:
- const size_t rowIdx = static_cast<size_t>(mLabelParams.mColumnNameIdx);
- if (rowIdx >= mData.size())
- {
- mData.resize(rowIdx + 1);
- }
- auto& row = mData[rowIdx];
- if (dataColumnIdx >= row.size())
- {
- row.resize(dataColumnIdx + 1);
- }
- mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx) = pColumnName;
- }
- /**
- * @brief Get column names
- * @returns vector of column names.
- */
- std::vector<std::string> GetColumnNames()
- {
- if (mLabelParams.mColumnNameIdx >= 0)
- {
- return std::vector<std::string>(mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).begin() +
- (mLabelParams.mRowNameIdx + 1),
- mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).end());
- }
- return std::vector<std::string>();
- }
- /**
- * @brief Get row name
- * @param pRowIdx zero-based column index.
- * @returns row name.
- */
- std::string GetRowName(const size_t pRowIdx)
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- if (mLabelParams.mRowNameIdx < 0)
- {
- throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
- }
- return mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx));
- }
- /**
- * @brief Set row name
- * @param pRowIdx zero-based row index.
- * @param pRowName row name.
- */
- void SetRowName(size_t pRowIdx, const std::string& pRowName)
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- mRowNames[pRowName] = dataRowIdx;
- if (mLabelParams.mRowNameIdx < 0)
- {
- throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
- }
- // increase table size if necessary:
- if (dataRowIdx >= mData.size())
- {
- mData.resize(dataRowIdx + 1);
- }
- auto& row = mData[dataRowIdx];
- if (mLabelParams.mRowNameIdx >= static_cast<ssize_t>(row.size()))
- {
- row.resize(static_cast<size_t>(mLabelParams.mRowNameIdx) + 1);
- }
- mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx)) = pRowName;
- }
- /**
- * @brief Get row names
- * @returns vector of row names.
- */
- std::vector<std::string> GetRowNames()
- {
- std::vector<std::string> rownames;
- if (mLabelParams.mRowNameIdx >= 0)
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
- {
- rownames.push_back(itRow->at(static_cast<size_t>(mLabelParams.mRowNameIdx)));
- }
- }
- }
- return rownames;
- }
- private:
- void ReadCsv()
- {
- std::ifstream stream;
- stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
- stream.open(mPath, std::ios::binary);
- ReadCsv(stream);
- }
- void ReadCsv(std::istream& pStream)
- {
- Clear();
- pStream.seekg(0, std::ios::end);
- std::streamsize length = pStream.tellg();
- pStream.seekg(0, std::ios::beg);
- #ifdef HAS_CODECVT
- std::vector<char> bom2b(2, '\0');
- if (length >= 2)
- {
- pStream.read(bom2b.data(), 2);
- pStream.seekg(0, std::ios::beg);
- }
- static const std::vector<char> bomU16le = { '\xff', '\xfe' };
- static const std::vector<char> bomU16be = { '\xfe', '\xff' };
- if ((bom2b == bomU16le) || (bom2b == bomU16be))
- {
- mIsUtf16 = true;
- mIsLE = (bom2b == bomU16le);
- std::wifstream wstream;
- wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit);
- wstream.open(mPath, std::ios::binary);
- if (mIsLE)
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff,
- static_cast<std::codecvt_mode>(std::consume_header |
- std::little_endian)>));
- }
- else
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff,
- std::consume_header>));
- }
- std::wstringstream wss;
- wss << wstream.rdbuf();
- std::string utf8 = ToString(wss.str());
- std::stringstream ss(utf8);
- ParseCsv(ss, static_cast<std::streamsize>(utf8.size()));
- }
- else
- #endif
- {
- // check for UTF-8 Byte order mark and skip it when found
- if (length >= 3)
- {
- std::vector<char> bom3b(3, '\0');
- pStream.read(bom3b.data(), 3);
- static const std::vector<char> bomU8 = { '\xef', '\xbb', '\xbf' };
- if (bom3b != bomU8)
- {
- // file does not start with a UTF-8 Byte order mark
- pStream.seekg(0, std::ios::beg);
- }
- else
- {
- // file did start with a UTF-8 Byte order mark, simply skip it
- length -= 3;
- }
- }
- ParseCsv(pStream, length);
- }
- }
- void ParseCsv(std::istream& pStream, std::streamsize p_FileLength)
- {
- const std::streamsize bufLength = 64 * 1024;
- std::vector<char> buffer(bufLength);
- std::vector<std::string> row;
- std::string cell;
- bool quoted = false;
- int cr = 0;
- int lf = 0;
- while (p_FileLength > 0)
- {
- const std::streamsize toReadLength = std::min<std::streamsize>(p_FileLength, bufLength);
- pStream.read(buffer.data(), toReadLength);
- // With user-specified istream opened in non-binary mode on windows, we may have a
- // data length mismatch, so ensure we don't parse outside actual data length read.
- const std::streamsize readLength = pStream.gcount();
- if (readLength <= 0)
- {
- break;
- }
- for (size_t i = 0; i < static_cast<size_t>(readLength); ++i)
- {
- if (buffer[i] == mSeparatorParams.mQuoteChar)
- {
- if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar))
- {
- quoted = !quoted;
- }
- cell += buffer[i];
- }
- else if (buffer[i] == mSeparatorParams.mSeparator)
- {
- if (!quoted)
- {
- row.push_back(Unquote(Trim(cell)));
- cell.clear();
- }
- else
- {
- cell += buffer[i];
- }
- }
- else if (buffer[i] == '\r')
- {
- if (mSeparatorParams.mQuotedLinebreaks && quoted)
- {
- cell += buffer[i];
- }
- else
- {
- ++cr;
- }
- }
- else if (buffer[i] == '\n')
- {
- if (mSeparatorParams.mQuotedLinebreaks && quoted)
- {
- cell += buffer[i];
- }
- else
- {
- ++lf;
- if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty())
- {
- // skip empty line
- }
- else
- {
- row.push_back(Unquote(Trim(cell)));
- if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() &&
- (row.at(0)[0] == mLineReaderParams.mCommentPrefix))
- {
- // skip comment line
- }
- else
- {
- mData.push_back(row);
- }
- cell.clear();
- row.clear();
- quoted = false;
- }
- }
- }
- else
- {
- cell += buffer[i];
- }
- }
- p_FileLength -= readLength;
- }
- // Handle last line without linebreak
- if (!cell.empty() || !row.empty())
- {
- row.push_back(Unquote(Trim(cell)));
- cell.clear();
- mData.push_back(row);
- row.clear();
- }
- // Assume CR/LF if at least half the linebreaks have CR
- mSeparatorParams.mHasCR = (cr > (lf / 2));
- // Set up column labels
- UpdateColumnNames();
- // Set up row labels
- UpdateRowNames();
- }
- void WriteCsv() const
- {
- #ifdef HAS_CODECVT
- if (mIsUtf16)
- {
- std::stringstream ss;
- WriteCsv(ss);
- std::string utf8 = ss.str();
- std::wstring wstr = ToWString(utf8);
- std::wofstream wstream;
- wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit);
- wstream.open(mPath, std::ios::binary | std::ios::trunc);
- if (mIsLE)
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff,
- static_cast<std::codecvt_mode>(std::little_endian)>));
- }
- else
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff>));
- }
- wstream << static_cast<wchar_t>(0xfeff);
- wstream << wstr;
- }
- else
- #endif
- {
- std::ofstream stream;
- stream.exceptions(std::ofstream::failbit | std::ofstream::badbit);
- stream.open(mPath, std::ios::binary | std::ios::trunc);
- WriteCsv(stream);
- }
- }
- void WriteCsv(std::ostream& pStream) const
- {
- for (auto itr = mData.begin(); itr != mData.end(); ++itr)
- {
- for (auto itc = itr->begin(); itc != itr->end(); ++itc)
- {
- if (mSeparatorParams.mAutoQuote &&
- ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) ||
- (itc->find(' ') != std::string::npos)))
- {
- // escape quotes in string
- std::string str = *itc;
- const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
- ReplaceString(str, quoteCharStr, quoteCharStr + quoteCharStr);
- pStream << quoteCharStr << str << quoteCharStr;
- }
- else
- {
- pStream << *itc;
- }
- if (std::distance(itc, itr->end()) > 1)
- {
- pStream << mSeparatorParams.mSeparator;
- }
- }
- pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n");
- }
- }
- size_t GetDataRowCount() const
- {
- return mData.size();
- }
- size_t GetDataColumnCount() const
- {
- return (mData.size() > 0) ? mData.at(0).size() : 0;
- }
- inline size_t GetDataRowIndex(const size_t pRowIdx) const
- {
- return pRowIdx + static_cast<size_t>(mLabelParams.mColumnNameIdx + 1);
- }
- inline size_t GetDataColumnIndex(const size_t pColumnIdx) const
- {
- return pColumnIdx + static_cast<size_t>(mLabelParams.mRowNameIdx + 1);
- }
- std::string Trim(const std::string& pStr)
- {
- if (mSeparatorParams.mTrim)
- {
- std::string str = pStr;
- // ltrim
- str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !isspace(ch); }));
- // rtrim
- str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !isspace(ch); }).base(), str.end());
- return str;
- }
- else
- {
- return pStr;
- }
- }
- std::string Unquote(const std::string& pStr)
- {
- if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) &&
- (pStr.front() == mSeparatorParams.mQuoteChar) &&
- (pStr.back() == mSeparatorParams.mQuoteChar))
- {
- // remove start/end quotes
- std::string str = pStr.substr(1, pStr.size() - 2);
- // unescape quotes in string
- const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
- ReplaceString(str, quoteCharStr + quoteCharStr, quoteCharStr);
- return str;
- }
- else
- {
- return pStr;
- }
- }
- void UpdateColumnNames()
- {
- mColumnNames.clear();
- if ((mLabelParams.mColumnNameIdx >= 0) &&
- (static_cast<ssize_t>(mData.size()) > mLabelParams.mColumnNameIdx))
- {
- size_t i = 0;
- for (auto& columnName : mData[static_cast<size_t>(mLabelParams.mColumnNameIdx)])
- {
- mColumnNames[columnName] = i++;
- }
- }
- }
- void UpdateRowNames()
- {
- mRowNames.clear();
- if ((mLabelParams.mRowNameIdx >= 0) &&
- (static_cast<ssize_t>(mData.size()) >
- (mLabelParams.mColumnNameIdx + 1)))
- {
- size_t i = 0;
- for (auto& dataRow : mData)
- {
- if (static_cast<ssize_t>(dataRow.size()) > mLabelParams.mRowNameIdx)
- {
- mRowNames[dataRow[static_cast<size_t>(mLabelParams.mRowNameIdx)]] = i++;
- }
- }
- }
- }
- #ifdef HAS_CODECVT
- #if defined(_MSC_VER)
- #pragma warning (push)
- #pragma warning (disable: 4996)
- #endif
- static std::string ToString(const std::wstring& pWStr)
- {
- return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.to_bytes(pWStr);
- }
- static std::wstring ToWString(const std::string& pStr)
- {
- return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.from_bytes(pStr);
- }
- #if defined(_MSC_VER)
- #pragma warning (pop)
- #endif
- #endif
- static void ReplaceString(std::string& pStr, const std::string& pSearch, const std::string& pReplace)
- {
- size_t pos = 0;
- while ((pos = pStr.find(pSearch, pos)) != std::string::npos)
- {
- pStr.replace(pos, pSearch.size(), pReplace);
- pos += pReplace.size();
- }
- }
- private:
- std::string mPath;
- LabelParams mLabelParams;
- SeparatorParams mSeparatorParams;
- ConverterParams mConverterParams;
- LineReaderParams mLineReaderParams;
- std::vector<std::vector<std::string>> mData;
- std::map<std::string, size_t> mColumnNames;
- std::map<std::string, size_t> mRowNames;
- #ifdef HAS_CODECVT
- bool mIsUtf16 = false;
- bool mIsLE = false;
- #endif
- };
- }
|