Skip to content

Commit

Permalink
Refactor string parsing logic from CommandLineParser into string_util
Browse files Browse the repository at this point in the history
  • Loading branch information
angela28chen committed Aug 15, 2023
1 parent 5ae59a4 commit 4b2440b
Show file tree
Hide file tree
Showing 6 changed files with 596 additions and 123 deletions.
62 changes: 17 additions & 45 deletions include/ppx/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#define ppx_command_line_parser_h

#include "nlohmann/json.hpp"
#include "ppx/log.h"
#include "ppx/string_util.h"

#include <cstdint>
#include <ios>
Expand All @@ -36,7 +38,7 @@ namespace ppx {

// All commandline flags are stored as key-value pairs (string, list of strings)
// Value syntax:
// - strings cannot contain "="
// - strings cannot contain "=" or ","
// - boolean values stored as "0" "false" "1" "true"
//
// GetOptionValueOrDefault() can be used to access value of specified type.
Expand Down Expand Up @@ -64,11 +66,15 @@ class CliOptions
return defaultValue;
}
auto valueStr = it->second.back();
return GetParsedOrDefault<T>(valueStr, defaultValue);
auto result = ppx::string_util::ParseOrDefault(valueStr, defaultValue);
if (result.second != std::nullopt) {
PPX_LOG_ERROR(result.second->errorMsg);
}
return result.first;
}

// Same as above, but intended for list flags that are specified on the command line
// with multiple instances of the same flag
// with multiple instances of the same flag, or with comma-separated values
template <typename T>
std::vector<T> GetOptionValueOrDefault(const std::string& optionName, const std::vector<T>& defaultValues) const
{
Expand All @@ -79,15 +85,15 @@ class CliOptions
std::vector<T> parsedValues;
T nullValue{};
for (size_t i = 0; i < it->second.size(); ++i) {
parsedValues.emplace_back(GetParsedOrDefault<T>(it->second.at(i), nullValue));
auto result = ppx::string_util::ParseOrDefault(it->second.at(i), nullValue);
if (result.second != std::nullopt) {
PPX_LOG_ERROR(result.second->errorMsg);
}
parsedValues.emplace_back(result.first);
}
return parsedValues;
}

// Same as above, but intended for resolution flags that are specified on command line
// with <Width>x<Height>
std::pair<int, int> GetOptionValueOrDefault(const std::string& optionName, const std::pair<int, int>& defaultValue) const;

// (WILL BE DEPRECATED, USE KNOBS INSTEAD)
// Get the parameter value after converting it into the desired integral,
// floating-point, or boolean type. If the value fails to be converted,
Expand All @@ -108,33 +114,6 @@ class CliOptions
// Same as above, but appends an array of values at the same key
void AddOption(std::string_view optionName, const std::vector<std::string>& valueArray);

template <typename T>
T GetParsedOrDefault(std::string_view valueStr, const T& defaultValue) const
{
static_assert(std::is_integral_v<T> || std::is_floating_point_v<T> || std::is_same_v<T, std::string>, "GetParsedOrDefault must be called with an integral, floating-point, boolean, or std::string type");
return Parse(valueStr, defaultValue);
}

// For boolean parameters
// interpreted as true: "true", 1, ""
// interpreted as false: "false", 0
bool Parse(std::string_view valueStr, bool defaultValue) const;

template <typename T>
T Parse(std::string_view valueStr, const T defaultValue) const
{
if constexpr (std::is_same_v<T, std::string>) {
return std::string(valueStr);
}
std::stringstream ss((std::string(valueStr)));
T valueAsNum;
ss >> valueAsNum;
if (ss.fail()) {
return defaultValue;
}
return valueAsNum;
}

private:
// All flag names (string) and parameters (vector of strings) specified on the command line
std::unordered_map<std::string, std::vector<std::string>> mAllOptions;
Expand All @@ -148,20 +127,13 @@ class CliOptions
class CommandLineParser
{
public:
struct ParsingError
{
ParsingError(const std::string& error)
: errorMsg(error) {}
std::string errorMsg;
};

// Parse the given arguments into options. Return false if parsing
// succeeded. Otherwise, return true if an error occurred,
// and write the error to `out_error`.
std::optional<ParsingError> Parse(int argc, const char* argv[]);
std::optional<ppx::string_util::ParsingError> Parse(int argc, const char* argv[]);

// Adds all options specified within jsonConfig to mOpts.
std::optional<ParsingError> AddJsonOptions(const nlohmann::json& jsonConfig);
std::optional<ppx::string_util::ParsingError> AddJsonOptions(const nlohmann::json& jsonConfig);

std::string GetJsonConfigFlagName() const { return mJsonConfigFlagName; }
const CliOptions& GetOptions() const { return mOpts; }
Expand All @@ -172,7 +144,7 @@ class CommandLineParser
private:
// Adds an option to mOpts and handles the special --no-flag-name case.
// Expects option names without the "--" prefix.
std::optional<ParsingError> AddOption(std::string_view optionName, std::string_view valueStr);
std::optional<ppx::string_util::ParsingError> AddOption(std::string_view optionName, std::string_view valueStr);

CliOptions mOpts;
std::string mJsonConfigFlagName = "config-json-path";
Expand Down
90 changes: 89 additions & 1 deletion include/ppx/string_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
namespace ppx {
namespace string_util {

// -------------------------------------------------------------------------------------------------
// Misc
// -------------------------------------------------------------------------------------------------

void TrimLeft(std::string& s);
void TrimRight(std::string& s);

Expand All @@ -31,10 +35,18 @@ std::string TrimCopy(const std::string& s);
// Trims all characters specified in c from both the left and right sides of s
std::string_view TrimBothEnds(std::string_view s, std::string_view c = " \t");

// Splits s at every instance of delimeter and returns a vector of substrings
// Returns std::nullopt if s contains: leading/trailing/consecutive delimiters
std::optional<std::vector<std::string_view>> Split(std::string_view s, char delimiter);

// Splits s at the first instance of delimeter and returns two substrings
// Returns std::nullopt if s does not contain the delimeter
// Returns std::nullopt if s is not in expected format of string-delimeter-string
std::optional<std::pair<std::string_view, std::string_view>> SplitInTwo(std::string_view s, char delimiter);

// -------------------------------------------------------------------------------------------------
// Formatting Strings
// -------------------------------------------------------------------------------------------------

// Formats string for printing with the specified width and left indent.
// Words will be pushed to the subsequent line to avoid line breaks in the
// middle of a word if possible.
Expand Down Expand Up @@ -74,6 +86,82 @@ std::string ToString(std::pair<T, T> values)
return ss.str();
}

// -------------------------------------------------------------------------------------------------
// Parsing Strings
// -------------------------------------------------------------------------------------------------

struct ParsingError
{
ParsingError(const std::string& error)
: errorMsg(error) {}
std::string errorMsg;
};

// ParseOrDefault() attempts to parse valueStr into the same type as defaultValue
// If successful, returns the parsed value and std::nullopt
// If unsucessful, returns defaultValue and ParsingError

// For strings
// e.g. "a string" -> "a string"
std::pair<std::string, std::optional<ParsingError>> ParseOrDefault(std::string_view valueStr, const std::string& defaultValue);
std::pair<std::string, std::optional<ParsingError>> ParseOrDefault(std::string_view valueStr, std::string_view defaultValue);

// For bool
// e.g. "true", "1", "" -> true
// e.g. "false", "0" -> false
std::pair<bool, std::optional<ParsingError>> ParseOrDefault(std::string_view valueStr, bool defaultValue);

// For integers, chars and floats
// e.g. "1.0" -> 1.0f
// e.g. "-20" -> -20
// e.g. "c" -> 'c'
template <typename T>
std::pair<T, std::optional<ParsingError>> ParseOrDefault(std::string_view valueStr, T defaultValue)
{
static_assert(std::is_integral_v<T> || std::is_floating_point_v<T>, "Attempted to parse invalid type for ParseOrDefault");

std::stringstream ss((std::string(valueStr)));
T valueAsNum;
ss >> valueAsNum;
if (ss.fail()) {
return std::make_pair(defaultValue, "could not be parsed as integral or float: " + std::string(valueStr));
}
return std::make_pair(valueAsNum, std::nullopt);
}

// For lists with comma-separated string representation
// e.g. "i1,i2,i3 with spaces,i4" -> {"i1", "i2", "i3 with spaces", "i4"}
template <typename T>
std::pair<typename std::vector<T>, std::optional<ParsingError>> ParseOrDefault(std::string_view valueStr, const std::vector<T>& defaultValues)
{
std::vector<std::string> splitStrings;
auto res = Split(valueStr, ',');
if (res == std::nullopt) {
// String contains no commas
splitStrings.emplace_back(valueStr);
}
else {
for (const auto sv : res.value()) {
splitStrings.emplace_back(std::string(sv));
}
}

std::vector<T> parsedValues;
T nullValue{};
for (const auto& singleStr : splitStrings) {
auto res = ParseOrDefault(singleStr, nullValue);
if (res.second != std::nullopt) {
return std::make_pair(defaultValues, res.second);
}
parsedValues.emplace_back(res.first);
}
return std::make_pair(parsedValues, std::nullopt);
}

// For resolution with x-separated string representation
// e.g. "600x800" -> (600, 800)
std::pair<std::pair<int, int>, std::optional<ParsingError>> ParseOrDefault(std::string_view valueStr, const std::pair<int, int>& defaultValue);

} // namespace string_util
} // namespace ppx

Expand Down
95 changes: 32 additions & 63 deletions src/ppx/command_line_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
#include <cctype>

#include "ppx/command_line_parser.h"
#include "ppx/log.h"
#include "ppx/string_util.h"

namespace {

Expand All @@ -38,23 +36,6 @@ bool StartsWithDoubleDash(std::string_view s)

namespace ppx {

std::pair<int, int> CliOptions::GetOptionValueOrDefault(const std::string& optionName, const std::pair<int, int>& defaultValue) const
{
auto it = mAllOptions.find(optionName);
if (it == mAllOptions.cend()) {
return defaultValue;
}
auto valueStr = it->second.back();
auto res = ppx::string_util::SplitInTwo(valueStr, 'x');
if (res == std::nullopt) {
PPX_LOG_ERROR("resolution flag must be in format <Width>x<Height>: " << valueStr);
return defaultValue;
}
int N = GetParsedOrDefault(res->first, defaultValue.first);
int M = GetParsedOrDefault(res->second, defaultValue.second);
return std::make_pair(N, M);
}

void CliOptions::AddOption(std::string_view optionName, std::string_view value)
{
std::string optionNameStr(optionName);
Expand All @@ -80,26 +61,7 @@ void CliOptions::AddOption(std::string_view optionName, const std::vector<std::s
storedValueArray.insert(storedValueArray.end(), valueArray.cbegin(), valueArray.cend());
}

bool CliOptions::Parse(std::string_view valueStr, bool defaultValue) const
{
if (valueStr == "") {
return true;
}
std::stringstream ss{std::string(valueStr)};
bool valueAsBool;
ss >> valueAsBool;
if (ss.fail()) {
ss.clear();
ss >> std::boolalpha >> valueAsBool;
if (ss.fail()) {
PPX_LOG_ERROR("could not be parsed as bool: " << valueStr);
return defaultValue;
}
}
return valueAsBool;
}

std::optional<CommandLineParser::ParsingError> CommandLineParser::Parse(int argc, const char* argv[])
std::optional<ppx::string_util::ParsingError> CommandLineParser::Parse(int argc, const char* argv[])
{
// argc should be >= 1 and argv[0] the name of the executable.
if (argc < 2) {
Expand All @@ -112,31 +74,28 @@ std::optional<CommandLineParser::ParsingError> CommandLineParser::Parse(int argc
std::vector<std::string_view> args;
for (size_t i = 1; i < argc; ++i) {
std::string_view argString(argv[i]);
auto res = ppx::string_util::SplitInTwo(argString, '=');
if (res == std::nullopt) {
if (StartsWithDoubleDash(argString) &&
argString == "--" + mJsonConfigFlagName &&
i + 1 < argc &&
!StartsWithDoubleDash(argv[i + 1])) {
mOpts.AddOption(mJsonConfigFlagName, ppx::string_util::TrimBothEnds(argv[i + 1]));
++i;
if (argString.find('=') != std::string_view::npos) {
auto res = ppx::string_util::SplitInTwo(argString, '=');
if (res == std::nullopt) {
return "Malformed flag with '=': \"" + std::string(argString) + "\"";
}
if (StartsWithDoubleDash(res->first) && res->first == "--" + mJsonConfigFlagName) {
mOpts.AddOption(mJsonConfigFlagName, ppx::string_util::TrimBothEnds(res->second));
continue;
}
args.emplace_back(argString);
args.emplace_back(res->first);
args.emplace_back(res->second);
continue;
}
if (res->first.empty() || res->second.empty()) {
return "Malformed flag with '=': \"" + std::string(argString) + "\"";
}
if (res->second.find('=') != std::string_view::npos) {
return "Unexpected number of '=' symbols in the following string: \"" + std::string(argString) + "\"";
}
if (StartsWithDoubleDash(res->first) && res->first == "--" + mJsonConfigFlagName) {
mOpts.AddOption(mJsonConfigFlagName, ppx::string_util::TrimBothEnds(res->second));
if (StartsWithDoubleDash(argString) &&
argString == "--" + mJsonConfigFlagName &&
i + 1 < argc &&
!StartsWithDoubleDash(argv[i + 1])) {
mOpts.AddOption(mJsonConfigFlagName, ppx::string_util::TrimBothEnds(argv[i + 1]));
++i;
continue;
}
args.emplace_back(res->first);
args.emplace_back(res->second);
args.emplace_back(argString);
}

// Flags inside JSON files are processed first
Expand Down Expand Up @@ -194,7 +153,7 @@ std::optional<CommandLineParser::ParsingError> CommandLineParser::Parse(int argc
return std::nullopt;
}

std::optional<CommandLineParser::ParsingError> CommandLineParser::AddJsonOptions(const nlohmann::json& jsonConfig)
std::optional<ppx::string_util::ParsingError> CommandLineParser::AddJsonOptions(const nlohmann::json& jsonConfig)
{
std::stringstream ss;
for (auto it = jsonConfig.cbegin(); it != jsonConfig.cend(); ++it) {
Expand All @@ -213,14 +172,12 @@ std::optional<CommandLineParser::ParsingError> CommandLineParser::AddJsonOptions
ss << it.value();
std::string value = ss.str();
ss.str("");
if (auto error = AddOption(it.key(), ppx::string_util::TrimBothEnds(value, " \t\""))) {
return error;
}
mOpts.AddOption(it.key(), ppx::string_util::TrimBothEnds(value, " \t\""));
}
return std::nullopt;
}

std::optional<CommandLineParser::ParsingError> CommandLineParser::AddOption(std::string_view optionName, std::string_view valueStr)
std::optional<ppx::string_util::ParsingError> CommandLineParser::AddOption(std::string_view optionName, std::string_view valueStr)
{
if (optionName.length() > 2 && optionName.substr(0, 3) == "no-") {
if (valueStr.length() > 0) {
Expand All @@ -229,6 +186,18 @@ std::optional<CommandLineParser::ParsingError> CommandLineParser::AddOption(std:
optionName = optionName.substr(3);
valueStr = "0";
}

if (valueStr.find(',') != std::string_view::npos) {
auto substringArray = ppx::string_util::Split(valueStr, ',');
if (substringArray == std::nullopt) {
return "invalid comma use for option \"" + std::string(optionName) + "\" and value \"" + std::string(valueStr) + "\"";
}
// Special case, comma-separated value lists specified on the commandline are added directly to mOpts to avoid inserting element by element
std::vector<std::string> substringStringArray(substringArray->cbegin(), substringArray->cend());
mOpts.AddOption(optionName, substringStringArray);
return std::nullopt;
}

mOpts.AddOption(optionName, valueStr);
return std::nullopt;
}
Expand Down
Loading

0 comments on commit 4b2440b

Please sign in to comment.