diff --git a/components/core/src/clp/TimestampPattern.cpp b/components/core/src/clp/TimestampPattern.cpp index 93f9b9638..c7a8bf78f 100644 --- a/components/core/src/clp/TimestampPattern.cpp +++ b/components/core/src/clp/TimestampPattern.cpp @@ -124,28 +124,45 @@ void TimestampPattern::init() { patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3"); // E.g. 2015-01-31T15:50:45,392 patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S,%3"); - // E.g. [2015-01-31T15:50:45 - patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S"); - // E.g. [20170106-16:56:41] - patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]"); - // E.g. 2015-01-31 15:50:45,392 - patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3"); // E.g. 2015-01-31 15:50:45.392 patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S.%3"); + // E.g. 2015-01-31 15:50:45,392 + patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3"); + // E.g. 2015/01/31T15:50:45.123 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S.%3"); + // E.g. 2015/01/31T15:50:45,123 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S,%3"); + // E.g. 2015/01/31 15:50:45.123 + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S.%3"); + // E.g. 2015/01/31 15:50:45,123 + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S,%3"); // E.g. [2015-01-31 15:50:45,085] patterns.emplace_back(0, "[%Y-%m-%d %H:%M:%S,%3]"); + // E.g. INFO [main] 2015-01-31 15:50:45,085 + patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3"); + // E.g. <<<2016-11-10 03:02:29:936 + patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3"); + // E.g. 01 Jan 2016 15:50:17,085 + patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3"); + + // E.g. 2015-01-31T15:50:45 + patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S"); // E.g. 2015-01-31 15:50:45 patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S"); - // E.g. Start-Date: 2015-01-31 15:50:45 - patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); + // E.g. 2015/01/31T15:50:45 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S"); // E.g. 2015/01/31 15:50:45 patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S"); + // E.g. [2015-01-31T15:50:45 + patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S"); + // E.g. [20170106-16:56:41] + patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]"); + // E.g. Start-Date: 2015-01-31 15:50:45 + patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); // E.g. 15/01/31 15:50:45 patterns.emplace_back(0, "%y/%m/%d %H:%M:%S"); // E.g. 150131 9:50:45 patterns.emplace_back(0, "%y%m%d %k:%M:%S"); - // E.g. 01 Jan 2016 15:50:17,085 - patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3"); // E.g. Jan 01, 2016 3:50:17 PM patterns.emplace_back(0, "%b %d, %Y %l:%M:%S %p"); // E.g. January 31, 2015 15:50 @@ -157,16 +174,12 @@ void TimestampPattern::init() { patterns.emplace_back(3, "[%d/%b/%Y:%H:%M:%S"); // E.g. 192.168.4.5 - - [01/01/2016:15:50:17 patterns.emplace_back(3, "[%d/%m/%Y:%H:%M:%S"); - // E.g. INFO [main] 2015-01-31 15:50:45,085 - patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3"); // E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44 patterns.emplace_back(6, "%Y-%m-%d %H:%M:%S"); // E.g. update-alternatives 2015-01-31 15:50:45 patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); // E.g. ERROR: apport (pid 4557) Sun Jan 1 15:50:45 2015 patterns.emplace_back(4, "%a %b %e %H:%M:%S %Y"); - // E.g. <<<2016-11-10 03:02:29:936 - patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3"); // E.g. Sun Jan 1 15:50:45 2015 patterns.emplace_back(0, "%a %b %e %H:%M:%S %Y"); diff --git a/components/core/src/clp_s/ArchiveWriter.cpp b/components/core/src/clp_s/ArchiveWriter.cpp index a9aa328e8..d337c20d4 100644 --- a/components/core/src/clp_s/ArchiveWriter.cpp +++ b/components/core/src/clp_s/ArchiveWriter.cpp @@ -90,37 +90,27 @@ size_t ArchiveWriter::get_data_size() { void ArchiveWriter::initialize_schema_writer(SchemaWriter* writer, Schema const& schema) { for (int32_t id : schema) { auto node = m_schema_tree->get_node(id); - std::string key_name = node->get_key_name(); switch (node->get_type()) { case NodeType::INTEGER: - writer->append_column(new Int64ColumnWriter(key_name, id)); + writer->append_column(new Int64ColumnWriter(id)); break; case NodeType::FLOAT: - writer->append_column(new FloatColumnWriter(key_name, id)); + writer->append_column(new FloatColumnWriter(id)); break; case NodeType::CLPSTRING: - writer->append_column( - new ClpStringColumnWriter(key_name, id, m_var_dict, m_log_dict) - ); + writer->append_column(new ClpStringColumnWriter(id, m_var_dict, m_log_dict)); break; case NodeType::VARSTRING: - writer->append_column(new VariableStringColumnWriter(key_name, id, m_var_dict)); + writer->append_column(new VariableStringColumnWriter(id, m_var_dict)); break; case NodeType::BOOLEAN: - writer->append_column(new BooleanColumnWriter(key_name, id)); + writer->append_column(new BooleanColumnWriter(id)); break; case NodeType::ARRAY: - writer->append_column( - new ClpStringColumnWriter(key_name, id, m_var_dict, m_array_dict) - ); + writer->append_column(new ClpStringColumnWriter(id, m_var_dict, m_array_dict)); break; case NodeType::DATESTRING: - writer->append_column(new DateStringColumnWriter(key_name, id, m_timestamp_dict)); - break; - case NodeType::FLOATDATESTRING: - writer->append_column( - new FloatDateStringColumnWriter(key_name, id, m_timestamp_dict) - ); + writer->append_column(new DateStringColumnWriter(id)); break; case NodeType::OBJECT: case NodeType::NULLVALUE: diff --git a/components/core/src/clp_s/ColumnReader.cpp b/components/core/src/clp_s/ColumnReader.cpp index c45104422..4c9bd5e42 100644 --- a/components/core/src/clp_s/ColumnReader.cpp +++ b/components/core/src/clp_s/ColumnReader.cpp @@ -156,22 +156,4 @@ std::variant DateStringColumnReader::extr epochtime_t DateStringColumnReader::get_encoded_time(uint64_t cur_message) { return m_timestamps[cur_message]; } - -void FloatDateStringColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) { - m_timestamps = std::make_unique(num_messages); - decompressor.try_read_exact_length( - reinterpret_cast(m_timestamps.get()), - num_messages * sizeof(double) - ); -} - -std::variant FloatDateStringColumnReader::extract_value( - uint64_t cur_message -) { - return std::to_string(m_timestamps[cur_message]); -} - -double FloatDateStringColumnReader::get_encoded_time(uint64_t cur_message) { - return m_timestamps[cur_message]; -} } // namespace clp_s diff --git a/components/core/src/clp_s/ColumnReader.hpp b/components/core/src/clp_s/ColumnReader.hpp index 98d24f728..58ee1072a 100644 --- a/components/core/src/clp_s/ColumnReader.hpp +++ b/components/core/src/clp_s/ColumnReader.hpp @@ -235,32 +235,6 @@ class DateStringColumnReader : public BaseColumnReader { std::unique_ptr m_timestamps; std::unique_ptr m_timestamp_encodings; }; - -class FloatDateStringColumnReader : public BaseColumnReader { -public: - // Constructor - FloatDateStringColumnReader(std::string const& name, int32_t id) : BaseColumnReader(name, id) {} - - // Destructor - ~FloatDateStringColumnReader() override = default; - - // Methods inherited from BaseColumnReader - void load(ZstdDecompressor& decompressor, uint64_t num_messages) override; - - NodeType get_type() override { return NodeType::FLOATDATESTRING; } - - std::variant extract_value(uint64_t cur_message - ) override; - - /** - * @param cur_message - * @return The encoded time in float epoch time - */ - double get_encoded_time(uint64_t cur_message); - -private: - std::unique_ptr m_timestamps; -}; } // namespace clp_s #endif // CLP_S_COLUMNREADER_HPP diff --git a/components/core/src/clp_s/ColumnWriter.cpp b/components/core/src/clp_s/ColumnWriter.cpp index 932dfcba7..23e548a3c 100644 --- a/components/core/src/clp_s/ColumnWriter.cpp +++ b/components/core/src/clp_s/ColumnWriter.cpp @@ -1,10 +1,7 @@ #include "ColumnWriter.hpp" namespace clp_s { -void Int64ColumnWriter::add_value( - std::variant& value, - size_t& size -) { +void Int64ColumnWriter::add_value(ParsedMessage::variable_t& value, size_t& size) { size = sizeof(int64_t); m_values.push_back(std::get(value)); } @@ -16,10 +13,7 @@ void Int64ColumnWriter::store(ZstdCompressor& compressor) { ); } -void FloatColumnWriter::add_value( - std::variant& value, - size_t& size -) { +void FloatColumnWriter::add_value(ParsedMessage::variable_t& value, size_t& size) { size = sizeof(double); m_values.push_back(std::get(value)); } @@ -31,10 +25,7 @@ void FloatColumnWriter::store(ZstdCompressor& compressor) { ); } -void BooleanColumnWriter::add_value( - std::variant& value, - size_t& size -) { +void BooleanColumnWriter::add_value(ParsedMessage::variable_t& value, size_t& size) { size = sizeof(uint8_t); m_values.push_back(std::get(value) ? 1 : 0); } @@ -46,10 +37,7 @@ void BooleanColumnWriter::store(ZstdCompressor& compressor) { ); } -void ClpStringColumnWriter::add_value( - std::variant& value, - size_t& size -) { +void ClpStringColumnWriter::add_value(ParsedMessage::variable_t& value, size_t& size) { size = sizeof(int64_t); std::string string_var = std::get(value); uint64_t id; @@ -78,10 +66,7 @@ void ClpStringColumnWriter::store(ZstdCompressor& compressor) { ); } -void VariableStringColumnWriter::add_value( - std::variant& value, - size_t& size -) { +void VariableStringColumnWriter::add_value(ParsedMessage::variable_t& value, size_t& size) { size = sizeof(int64_t); std::string string_var = std::get(value); uint64_t id; @@ -96,19 +81,11 @@ void VariableStringColumnWriter::store(ZstdCompressor& compressor) { ); } -void DateStringColumnWriter::add_value( - std::variant& value, - size_t& size -) { +void DateStringColumnWriter::add_value(ParsedMessage::variable_t& value, size_t& size) { size = 2 * sizeof(int64_t); - std::string string_timestamp = std::get(value); - - uint64_t encoding_id; - epochtime_t timestamp - = m_timestamp_dict->ingest_entry(m_name, m_id, string_timestamp, encoding_id); - - m_timestamps.push_back(timestamp); - m_timestamp_encodings.push_back(encoding_id); + auto encoded_timestamp = std::get>(value); + m_timestamps.push_back(encoded_timestamp.second); + m_timestamp_encodings.push_back(encoded_timestamp.first); } void DateStringColumnWriter::store(ZstdCompressor& compressor) { @@ -121,23 +98,4 @@ void DateStringColumnWriter::store(ZstdCompressor& compressor) { m_timestamp_encodings.size() * sizeof(int64_t) ); } - -void FloatDateStringColumnWriter::add_value( - std::variant& value, - size_t& size -) { - size = sizeof(double); - double timestamp = std::get(value); - - m_timestamp_dict->ingest_entry(m_name, m_id, timestamp); - - m_timestamps.push_back(timestamp); -} - -void FloatDateStringColumnWriter::store(ZstdCompressor& compressor) { - compressor.write( - reinterpret_cast(m_timestamps.data()), - m_timestamps.size() * sizeof(double) - ); -} } // namespace clp_s diff --git a/components/core/src/clp_s/ColumnWriter.hpp b/components/core/src/clp_s/ColumnWriter.hpp index ae7c9b3ba..5546bf282 100644 --- a/components/core/src/clp_s/ColumnWriter.hpp +++ b/components/core/src/clp_s/ColumnWriter.hpp @@ -8,6 +8,7 @@ #include "DictionaryWriter.hpp" #include "FileWriter.hpp" +#include "ParsedMessage.hpp" #include "TimestampDictionaryWriter.hpp" #include "VariableEncoder.hpp" #include "ZstdCompressor.hpp" @@ -18,7 +19,7 @@ namespace clp_s { class BaseColumnWriter { public: // Constructor - explicit BaseColumnWriter(std::string name, int32_t id) : m_name(std::move(name)), m_id(id) {} + explicit BaseColumnWriter(int32_t id) : m_id(id) {} // Destructor virtual ~BaseColumnWriter() = default; @@ -28,8 +29,7 @@ class BaseColumnWriter { * @param value * @param size */ - virtual void add_value(std::variant& value, size_t& size) - = 0; + virtual void add_value(ParsedMessage::variable_t& value, size_t& size) = 0; /** * Stores the column to a compressed file @@ -37,27 +37,20 @@ class BaseColumnWriter { */ virtual void store(ZstdCompressor& compressor) = 0; - /** - * @return Name of the column - */ - std::string get_name() { return m_name; } - protected: - std::string m_name; int32_t m_id; }; class Int64ColumnWriter : public BaseColumnWriter { public: // Constructor - explicit Int64ColumnWriter(std::string name, int32_t id) - : BaseColumnWriter(std::move(name), id) {} + explicit Int64ColumnWriter(int32_t id) : BaseColumnWriter(id) {} // Destructor ~Int64ColumnWriter() override = default; // Methods inherited from BaseColumnWriter - void add_value(std::variant& value, size_t& size) override; + void add_value(ParsedMessage::variable_t& value, size_t& size) override; void store(ZstdCompressor& compressor) override; @@ -68,14 +61,13 @@ class Int64ColumnWriter : public BaseColumnWriter { class FloatColumnWriter : public BaseColumnWriter { public: // Constructor - explicit FloatColumnWriter(std::string name, int32_t id) - : BaseColumnWriter(std::move(name), id) {} + explicit FloatColumnWriter(int32_t id) : BaseColumnWriter(id) {} // Destructor ~FloatColumnWriter() override = default; // Methods inherited from BaseColumnWriter - void add_value(std::variant& value, size_t& size) override; + void add_value(ParsedMessage::variable_t& value, size_t& size) override; void store(ZstdCompressor& compressor) override; @@ -86,14 +78,13 @@ class FloatColumnWriter : public BaseColumnWriter { class BooleanColumnWriter : public BaseColumnWriter { public: // Constructor - explicit BooleanColumnWriter(std::string name, int32_t id) - : BaseColumnWriter(std::move(name), id) {} + explicit BooleanColumnWriter(int32_t id) : BaseColumnWriter(id) {} // Destructor ~BooleanColumnWriter() override = default; // Methods inherited from BaseColumnWriter - void add_value(std::variant& value, size_t& size) override; + void add_value(ParsedMessage::variable_t& value, size_t& size) override; void store(ZstdCompressor& compressor) override; @@ -105,12 +96,11 @@ class ClpStringColumnWriter : public BaseColumnWriter { public: // Constructor ClpStringColumnWriter( - std::string const& name, int32_t id, std::shared_ptr var_dict, std::shared_ptr log_dict ) - : BaseColumnWriter(name, id), + : BaseColumnWriter(id), m_var_dict(std::move(var_dict)), m_log_dict(std::move(log_dict)) {} @@ -118,7 +108,7 @@ class ClpStringColumnWriter : public BaseColumnWriter { ~ClpStringColumnWriter() override = default; // Methods inherited from BaseColumnWriter - void add_value(std::variant& value, size_t& size) override; + void add_value(ParsedMessage::variable_t& value, size_t& size) override; void store(ZstdCompressor& compressor) override; @@ -164,19 +154,15 @@ class ClpStringColumnWriter : public BaseColumnWriter { class VariableStringColumnWriter : public BaseColumnWriter { public: // Constructor - VariableStringColumnWriter( - std::string const& name, - int32_t id, - std::shared_ptr var_dict - ) - : BaseColumnWriter(name, id), + VariableStringColumnWriter(int32_t id, std::shared_ptr var_dict) + : BaseColumnWriter(id), m_var_dict(std::move(var_dict)) {} // Destructor ~VariableStringColumnWriter() override = default; // Methods inherited from BaseColumnWriter - void add_value(std::variant& value, size_t& size) override; + void add_value(ParsedMessage::variable_t& value, size_t& size) override; void store(ZstdCompressor& compressor) override; @@ -188,54 +174,20 @@ class VariableStringColumnWriter : public BaseColumnWriter { class DateStringColumnWriter : public BaseColumnWriter { public: // Constructor - DateStringColumnWriter( - std::string const& name, - int32_t id, - std::shared_ptr timestamp_dict - ) - : BaseColumnWriter(name, id), - m_timestamp_dict(std::move(timestamp_dict)) {} + explicit DateStringColumnWriter(int32_t id) : BaseColumnWriter(id) {} // Destructor ~DateStringColumnWriter() override = default; // Methods inherited from BaseColumnWriter - void add_value(std::variant& value, size_t& size) override; + void add_value(ParsedMessage::variable_t& value, size_t& size) override; void store(ZstdCompressor& compressor) override; private: - std::shared_ptr m_timestamp_dict; - std::vector m_timestamps; std::vector m_timestamp_encodings; }; - -class FloatDateStringColumnWriter : public BaseColumnWriter { -public: - // Constructor - FloatDateStringColumnWriter( - std::string const& name, - int32_t id, - std::shared_ptr timestamp_dict - ) - : BaseColumnWriter(name, id), - - m_timestamp_dict(std::move(timestamp_dict)) {} - - // Destructor - ~FloatDateStringColumnWriter() override = default; - - // Methods inherited from BaseColumnWriter - void add_value(std::variant& value, size_t& size) override; - - void store(ZstdCompressor& compressor) override; - -private: - std::shared_ptr m_timestamp_dict; - - std::vector m_timestamps; -}; } // namespace clp_s #endif // CLP_S_COLUMNWRITER_HPP diff --git a/components/core/src/clp_s/JsonParser.cpp b/components/core/src/clp_s/JsonParser.cpp index 96627c1b0..b4f722164 100644 --- a/components/core/src/clp_s/JsonParser.cpp +++ b/components/core/src/clp_s/JsonParser.cpp @@ -150,22 +150,16 @@ void JsonParser::parse_line(ondemand::value line, int32_t parent_node_id, std::s line.raw_json_token().substr(1, line.raw_json_token().size() - 2) ); if (matches_timestamp) { - double ret_double; - if (StringUtils::convert_string_to_double(value, ret_double)) { - node_id = m_schema_tree->add_node( - node_id_stack.top(), - NodeType::FLOATDATESTRING, - m_timestamp_key - ); - m_current_parsed_message.add_value(node_id, ret_double); - } else { - node_id = m_schema_tree->add_node( - node_id_stack.top(), - NodeType::DATESTRING, - m_timestamp_key - ); - m_current_parsed_message.add_value(node_id, value); - } + node_id = m_schema_tree->add_node( + node_id_stack.top(), + NodeType::DATESTRING, + cur_key + ); + uint64_t encoding_id{0}; + epochtime_t timestamp + = m_timestamp_dictionary + ->ingest_entry(m_timestamp_key, node_id, value, encoding_id); + m_current_parsed_message.add_value(node_id, encoding_id, timestamp); matches_timestamp = may_match_timestamp = can_match_timestamp = false; } else if (value.find(' ') != std::string::npos) { node_id = m_schema_tree diff --git a/components/core/src/clp_s/ParsedMessage.hpp b/components/core/src/clp_s/ParsedMessage.hpp index 769440778..4715441f1 100644 --- a/components/core/src/clp_s/ParsedMessage.hpp +++ b/components/core/src/clp_s/ParsedMessage.hpp @@ -6,9 +6,15 @@ #include #include +#include "Defs.hpp" + namespace clp_s { class ParsedMessage { public: + // Types + using variable_t + = std::variant>; + // Constructor ParsedMessage() : m_schema_id(-1) {} @@ -18,17 +24,43 @@ class ParsedMessage { void set_id(int32_t schema_id) { m_schema_id = schema_id; } /** - * Adds a value with different types to the message + * Adds an int64_t value to the message for a given MST node ID. * @param node_id * @param value */ - inline void add_value(int32_t node_id, int64_t value) { m_message[node_id] = value; } + inline void add_value(int32_t node_id, int64_t value) { m_message.emplace(node_id, value); } - inline void add_value(int32_t node_id, double value) { m_message[node_id] = value; } + /** + * Adds a double value to the message for a given MST node ID. + * @param node_id + * @param value + */ + inline void add_value(int32_t node_id, double value) { m_message.emplace(node_id, value); } + + /** + * Adds a string value to the message for a given MST node ID. + * @param node_id + * @param value + */ + inline void add_value(int32_t node_id, std::string const& value) { + m_message.emplace(node_id, value); + } - inline void add_value(int32_t node_id, std::string const& value) { m_message[node_id] = value; } + /** + * Adds a boolean value to the message for a given MST node ID. + * @param node_id + * @param value + */ + inline void add_value(int32_t node_id, bool value) { m_message.emplace(node_id, value); } - inline void add_value(int32_t node_id, bool value) { m_message[node_id] = value; } + /** + * Adds a timestamp value and its encoding to the message for a given MST node ID. + * @param node_id + * @param value + */ + inline void add_value(int32_t node_id, uint64_t encoding_id, epochtime_t value) { + m_message.emplace(node_id, std::make_pair(encoding_id, value)); + } /** * Clears the message @@ -41,13 +73,11 @@ class ParsedMessage { /** * @return The content of the message */ - std::map>& get_content() { - return m_message; - } + std::map& get_content() { return m_message; } private: int32_t m_schema_id; - std::map> m_message; + std::map m_message; }; } // namespace clp_s diff --git a/components/core/src/clp_s/ReaderUtils.cpp b/components/core/src/clp_s/ReaderUtils.cpp index 0a35e3cfc..c1b284955 100644 --- a/components/core/src/clp_s/ReaderUtils.cpp +++ b/components/core/src/clp_s/ReaderUtils.cpp @@ -218,9 +218,6 @@ BaseColumnReader* ReaderUtils::append_reader_column( case NodeType::DATESTRING: column_reader = new DateStringColumnReader(key_name, column_id, timestamp_dict); break; - case NodeType::FLOATDATESTRING: - column_reader = new FloatDateStringColumnReader(key_name, column_id); - break; case NodeType::OBJECT: case NodeType::NULLVALUE: reader->append_column(column_id); diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp index 43bcf3042..8d62b648a 100644 --- a/components/core/src/clp_s/SchemaReader.cpp +++ b/components/core/src/clp_s/SchemaReader.cpp @@ -25,12 +25,6 @@ void SchemaReader::mark_column_as_timestamp(BaseColumnReader* column_reader) { return static_cast(m_timestamp_column) ->get_encoded_time(m_cur_message); }; - } else if (m_timestamp_column->get_type() == NodeType::FLOATDATESTRING) { - m_get_timestamp = [this]() { - double timestamp = static_cast(m_timestamp_column) - ->get_encoded_time(m_cur_message); - return static_cast(timestamp); - }; } else if (m_timestamp_column->get_type() == NodeType::INTEGER) { m_get_timestamp = [this]() { return std::get(m_extracted_values[m_timestamp_column->get_id()]); @@ -262,8 +256,7 @@ void SchemaReader::generate_json_template(int32_t id) { } case NodeType::CLPSTRING: case NodeType::VARSTRING: - case NodeType::DATESTRING: - case NodeType::FLOATDATESTRING: { + case NodeType::DATESTRING: { m_json_serializer->add_op(JsonSerializer::Op::AddStringField); m_reordered_columns.push_back(m_column_map[child_global_id]); break; diff --git a/components/core/src/clp_s/SchemaTree.hpp b/components/core/src/clp_s/SchemaTree.hpp index dbf6f0796..a3c2b0f88 100644 --- a/components/core/src/clp_s/SchemaTree.hpp +++ b/components/core/src/clp_s/SchemaTree.hpp @@ -20,7 +20,6 @@ enum class NodeType : uint8_t { ARRAY, NULLVALUE, DATESTRING, - FLOATDATESTRING, UNKNOWN }; diff --git a/components/core/src/clp_s/TimestampDictionaryReader.cpp b/components/core/src/clp_s/TimestampDictionaryReader.cpp index fdec53ce2..38f12627c 100644 --- a/components/core/src/clp_s/TimestampDictionaryReader.cpp +++ b/components/core/src/clp_s/TimestampDictionaryReader.cpp @@ -46,14 +46,11 @@ void TimestampDictionaryReader::read_new_entries(bool local) { for (uint64_t i = 0; i < range_index_size; ++i) { TimestampEntry entry; + std::vector tokens; entry.try_read_from_file(m_dictionary_decompressor); + StringUtils::tokenize_column_descriptor(entry.get_key_name(), tokens); m_entries.emplace_back(std::move(entry)); - std::string column_name = entry.get_key_name(); - std::vector tokens; - StringUtils::tokenize_column_descriptor(column_name, tokens); - m_tokenized_column_to_range.emplace_back(std::move(tokens), &m_entries.back()); - // TODO: Currently, we only allow a single authoritative timestamp column at ingestion time, // but the timestamp dictionary is designed to store the ranges of several timestamp // columns. We should enforce a convention that the first entry in the timestamp dictionary @@ -62,6 +59,8 @@ void TimestampDictionaryReader::read_new_entries(bool local) { m_authoritative_timestamp_column_ids = m_entries.back().get_column_ids(); m_authoritative_timestamp_tokenized_column = tokens; } + + m_tokenized_column_to_range.emplace_back(std::move(tokens), &m_entries.back()); } // Local timestamp dictionaries only contain range indices, and diff --git a/components/core/src/clp_s/TimestampDictionaryWriter.cpp b/components/core/src/clp_s/TimestampDictionaryWriter.cpp index af0f3eacf..d03724abe 100644 --- a/components/core/src/clp_s/TimestampDictionaryWriter.cpp +++ b/components/core/src/clp_s/TimestampDictionaryWriter.cpp @@ -101,14 +101,13 @@ size_t TimestampDictionaryWriter::close_local() { } uint64_t TimestampDictionaryWriter::get_pattern_id(TimestampPattern const* pattern) { - if (0 == m_pattern_to_id.count(pattern)) { + auto it = m_pattern_to_id.find(pattern); + if (m_pattern_to_id.end() == it) { uint64_t id = m_next_id++; - m_pattern_to_id[pattern] = id; - + m_pattern_to_id.emplace(pattern, id); return id; } - - return m_pattern_to_id.at(pattern); + return it->second; } epochtime_t TimestampDictionaryWriter::ingest_entry( @@ -119,18 +118,32 @@ epochtime_t TimestampDictionaryWriter::ingest_entry( ) { epochtime_t ret; size_t timestamp_begin_pos = 0, timestamp_end_pos = 0; - TimestampPattern const* pattern = TimestampPattern::search_known_ts_patterns( - timestamp, - ret, - timestamp_begin_pos, - timestamp_end_pos - ); - - if (pattern == nullptr) { + TimestampPattern const* pattern{nullptr}; + + // Try parsing the timestamp as one of the previously seen timestamp patterns + for (auto it : m_pattern_to_id) { + if (it.first->parse_timestamp(timestamp, ret, timestamp_begin_pos, timestamp_end_pos)) { + pattern = it.first; + pattern_id = it.second; + break; + } + } + + // Fall back to consulting all known timestamp patterns + if (nullptr == pattern) { + pattern = TimestampPattern::search_known_ts_patterns( + timestamp, + ret, + timestamp_begin_pos, + timestamp_end_pos + ); + pattern_id = get_pattern_id(pattern); + } + + if (nullptr == pattern) { throw OperationFailed(ErrorCodeFailure, __FILE__, __LINE__); } - pattern_id = get_pattern_id(pattern); auto entry = m_local_column_id_to_range.find(node_id); if (entry == m_local_column_id_to_range.end()) { TimestampEntry new_entry(key); diff --git a/components/core/src/clp_s/TimestampPattern.cpp b/components/core/src/clp_s/TimestampPattern.cpp index 9457d5cda..4ddb5648e 100644 --- a/components/core/src/clp_s/TimestampPattern.cpp +++ b/components/core/src/clp_s/TimestampPattern.cpp @@ -8,7 +8,9 @@ #include #include +#include +using clp::string_utils::convert_string_to_int; using std::string; using std::to_string; using std::vector; @@ -91,7 +93,6 @@ static bool convert_string_to_number_notz( size_t max_digits, size_t begin_ix, size_t& end_ix, - char padding_character, int& value ); @@ -206,36 +207,70 @@ static bool convert_string_to_number_notz( void TimestampPattern::init() { // First create vector of observed patterns so that it's easy to maintain vector patterns; + // E.g. 1706980946603 + patterns.emplace_back(0, "%E"); + // E.g. 1679711330.789032462 + patterns.emplace_back(0, "%F"); + // E.g. 2022-04-06T03:33:23.476Z ...47, ...4 ...() patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%TZ"); // E.g. 2022-04-06T03:33:23Z patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%SZ"); + // E.g. 2022-04-06 03:33:23.476Z ...47, ...4 ...() + patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S.%TZ"); + // E.g. 2022-04-06 03:33:23Z + patterns.emplace_back(0, "%Y-%m-%d %H:%M:%SZ"); + // E.g. 2022/04/06T03:33:23.476Z ...47, ...4 ...() + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S.%TZ"); + // E.g. 2022/04/06T03:33:23Z + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%SZ"); + // E.g. 2022/04/06 03:33:23.476Z ...47, ...4 ...() + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S.%TZ"); + // E.g. 2022/04/06 03:33:23Z + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%SZ"); + // E.g. 2015-01-31T15:50:45.392 patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3"); // E.g. 2015-01-31T15:50:45,392 patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S,%3"); - // E.g. [2015-01-31T15:50:45 - patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S"); - // E.g. [20170106-16:56:41] - patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]"); - // E.g. 2015-01-31 15:50:45,392 - patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3"); // E.g. 2015-01-31 15:50:45.392 patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S.%3"); + // E.g. 2015-01-31 15:50:45,392 + patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3"); + // E.g. 2015/01/31T15:50:45.123 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S.%3"); + // E.g. 2015/01/31T15:50:45,123 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S,%3"); + // E.g. 2015/01/31 15:50:45.123 + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S.%3"); + // E.g. 2015/01/31 15:50:45,123 + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S,%3"); // E.g. [2015-01-31 15:50:45,085] patterns.emplace_back(0, "[%Y-%m-%d %H:%M:%S,%3]"); + // E.g. INFO [main] 2015-01-31 15:50:45,085 + patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3"); + // E.g. <<<2016-11-10 03:02:29:936 + patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3"); + // E.g. 01 Jan 2016 15:50:17,085 + patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3"); + // E.g. 2015-01-31T15:50:45 + patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S"); // E.g. 2015-01-31 15:50:45 patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S"); - // E.g. Start-Date: 2015-01-31 15:50:45 - patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); + // E.g. 2015/01/31T15:50:45 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S"); // E.g. 2015/01/31 15:50:45 patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S"); + // E.g. [2015-01-31T15:50:45 + patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S"); + // E.g. [20170106-16:56:41] + patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]"); + // E.g. Start-Date: 2015-01-31 15:50:45 + patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); // E.g. 15/01/31 15:50:45 patterns.emplace_back(0, "%y/%m/%d %H:%M:%S"); // E.g. 150131 9:50:45 patterns.emplace_back(0, "%y%m%d %k:%M:%S"); - // E.g. 01 Jan 2016 15:50:17,085 - patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3"); // E.g. Jan 01, 2016 3:50:17 PM patterns.emplace_back(0, "%b %d, %Y %l:%M:%S %p"); // E.g. January 31, 2015 15:50 @@ -247,16 +282,14 @@ void TimestampPattern::init() { patterns.emplace_back(3, "[%d/%b/%Y:%H:%M:%S"); // E.g. 192.168.4.5 - - [01/01/2016:15:50:17 patterns.emplace_back(3, "[%d/%m/%Y:%H:%M:%S"); - // E.g. INFO [main] 2015-01-31 15:50:45,085 - patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3"); // E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44 patterns.emplace_back(6, "%Y-%m-%d %H:%M:%S"); // E.g. update-alternatives 2015-01-31 15:50:45 patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); // E.g. ERROR: apport (pid 4557) Sun Jan 1 15:50:45 2015 patterns.emplace_back(4, "%a %b %e %H:%M:%S %Y"); - // E.g. <<<2016-11-10 03:02:29:936 - patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3"); + // E.g. Sun Jan 1 15:50:45 2015 + patterns.emplace_back(0, "%a %b %e %H:%M:%S %Y"); // TODO These patterns are imprecise and will prevent searching by timestamp; but for now, // it's no worse than not parsing a timestamp E.g. Jan 21 11:56:42 @@ -772,6 +805,57 @@ bool TimestampPattern::parse_timestamp( break; } + case 'E': { // Millisecond-precision UNIX epoch timestamp + // Only allow consuming entire timestamp string + // Note: "timestamp" is how the result is returned by reference + // Note: this format will also accept any integer timestamp (including UNIX + // epoch seconds and nanoseconds as well) + if (line_ix > 0 || false == convert_string_to_int(line, timestamp)) { + return false; + } + timestamp_begin_pos = 0; + timestamp_end_pos = line.length(); + return true; + } + + case 'F': { // Nanosecond-precision floating-point UNIX epoch timestamp + constexpr auto cNanosecondDigits = 9; + constexpr auto cNanosecondMultiplier = 1'000'000'000; + // Only allow consuming entire timestamp string + if (line_ix > 0) { + return false; + } + auto dot_position = line.find('.'); + auto nanosecond_start = dot_position + 1; + if (std::string::npos == dot_position || 0 == dot_position + || cNanosecondDigits != (line.length() - nanosecond_start)) + { + return false; + } + + auto timestamp_view = std::string_view(line); + if (false + == convert_string_to_int(timestamp_view.substr(0, dot_position), timestamp)) + { + return false; + } + + epochtime_t timestamp_nanoseconds; + if (false + == convert_string_to_int( + timestamp_view.substr(nanosecond_start, cNanosecondDigits), + timestamp_nanoseconds + )) + { + return false; + } + + timestamp = timestamp * cNanosecondMultiplier + timestamp_nanoseconds; + timestamp_begin_pos = 0; + timestamp_end_pos = line.length(); + return true; + } + default: return false; } @@ -983,6 +1067,21 @@ void TimestampPattern::insert_formatted_timestamp(epochtime_t timestamp, string& append_padded_value_notz(millisecond, '0', 3, new_msg); break; + case 'E': // UNIX epoch milliseconds + // Note: this timestamp format is required to make up the entire timestamp, so + // this is safe + new_msg = std::to_string(timestamp); + break; + + case 'F': { // Nanosecond precision floating point UNIX epoch timestamp + constexpr auto cNanosecondDigits = 9; + // Note: this timestamp format is required to make up the entire timestamp, so + // this is safe + new_msg = std::to_string(timestamp); + new_msg.insert(new_msg.end() - cNanosecondDigits, '.'); + break; + } + default: { throw OperationFailed(ErrorCodeUnsupported, __FILENAME__, __LINE__); } diff --git a/components/core/src/clp_s/TimestampPattern.hpp b/components/core/src/clp_s/TimestampPattern.hpp index f500df868..9219d33bb 100644 --- a/components/core/src/clp_s/TimestampPattern.hpp +++ b/components/core/src/clp_s/TimestampPattern.hpp @@ -42,7 +42,10 @@ namespace clp_s { * - M 2-digit 0-padded minute (00-59) * - S 2-digit 0-padded second (00-60) (60 to account for leap seconds) * - 3 0-padded millisecond (000-999) - * - T 0-padded millisecond no trailing 0 (000)-999) e.g. (000), 9(00), 99(0), 099 + * - T 0-padded millisecond no trailing 0 (000-999) e.g. (000), 9(00), 99(0), 099 + * - E N-digit millisecond-precision UNIX epoch timestamp + * - F N-digit nanosecond-precision UNIX epoch timestamp in floating-point format with 9 digits + * trailing the decimal */ class TimestampPattern { public: diff --git a/components/core/src/clp_s/search/DateLiteral.cpp b/components/core/src/clp_s/search/DateLiteral.cpp index 6296baa64..2f303f4d7 100644 --- a/components/core/src/clp_s/search/DateLiteral.cpp +++ b/components/core/src/clp_s/search/DateLiteral.cpp @@ -25,27 +25,13 @@ std::shared_ptr DateLiteral::create_from_int(epochtime_t v) { } std::shared_ptr DateLiteral::create_from_string(std::string const& v) { - std::istringstream ss(v); - epochtime_t tmp_int_epoch; - double tmp_double_epoch; - - ss >> std::noskipws >> tmp_int_epoch; - if (false == ss.fail() && ss.eof()) { - return std::shared_ptr(static_cast(new DateLiteral(tmp_int_epoch, v))); - } - - ss = std::istringstream(v); - ss >> std::noskipws >> tmp_double_epoch; - if (false == ss.fail() && ss.eof()) { - return std::shared_ptr(static_cast(new DateLiteral(tmp_double_epoch, v)) - ); - } - // begin end arguments are returned only -- their value doesn't matter - size_t timestamp_begin_pos = 0, timestamp_end_pos = 0; + size_t timestamp_begin_pos{0}; + size_t timestamp_end_pos{0}; + epochtime_t timestamp; auto pattern = TimestampPattern::search_known_ts_patterns( v, - tmp_int_epoch, + timestamp, timestamp_begin_pos, timestamp_end_pos ); @@ -53,7 +39,7 @@ std::shared_ptr DateLiteral::create_from_string(std::string const& v) { return std::shared_ptr(nullptr); } - return std::shared_ptr(static_cast(new DateLiteral(tmp_int_epoch, v))); + return std::shared_ptr(static_cast(new DateLiteral(timestamp, v))); } void DateLiteral::print() { diff --git a/components/core/src/clp_s/search/DateLiteral.hpp b/components/core/src/clp_s/search/DateLiteral.hpp index 09df3fb03..69eb56187 100644 --- a/components/core/src/clp_s/search/DateLiteral.hpp +++ b/components/core/src/clp_s/search/DateLiteral.hpp @@ -7,7 +7,7 @@ #include "Integral.hpp" namespace clp_s::search { -constexpr LiteralTypeBitmask cDateLiteralTypes = EpochDateT | FloatDateT; +constexpr LiteralTypeBitmask cDateLiteralTypes = EpochDateT; /** * Class for Date literal in the search AST. Represents time @@ -46,8 +46,6 @@ class DateLiteral : public Integral { bool as_epoch_date() override { return true; } - bool as_float_date() override { return true; } - bool as_clp_string(std::string& ret, FilterOperation op) override; bool as_var_string(std::string& ret, FilterOperation op) override; diff --git a/components/core/src/clp_s/search/EvaluateTimestampIndex.cpp b/components/core/src/clp_s/search/EvaluateTimestampIndex.cpp index c958e9787..157001671 100644 --- a/components/core/src/clp_s/search/EvaluateTimestampIndex.cpp +++ b/components/core/src/clp_s/search/EvaluateTimestampIndex.cpp @@ -6,7 +6,7 @@ #include "OrExpr.hpp" namespace clp_s::search { -constexpr LiteralTypeBitmask cDateTypes = cIntegralTypes | EpochDateT | FloatDateT; +constexpr LiteralTypeBitmask cDateTypes = cIntegralTypes | EpochDateT; EvaluatedValue EvaluateTimestampIndex::run(std::shared_ptr const& expr) { if (std::dynamic_pointer_cast(expr)) { diff --git a/components/core/src/clp_s/search/Integral.hpp b/components/core/src/clp_s/search/Integral.hpp index eb619deed..0f54aff80 100644 --- a/components/core/src/clp_s/search/Integral.hpp +++ b/components/core/src/clp_s/search/Integral.hpp @@ -62,8 +62,6 @@ class Integral : public Literal { bool as_epoch_date() override { return true; } - bool as_float_date() override { return true; } - bool as_var_string(std::string& ret, FilterOperation op) override; bool as_float(double& ret, FilterOperation op) override; diff --git a/components/core/src/clp_s/search/Literal.hpp b/components/core/src/clp_s/search/Literal.hpp index 5e06e2a49..ecea62418 100644 --- a/components/core/src/clp_s/search/Literal.hpp +++ b/components/core/src/clp_s/search/Literal.hpp @@ -20,7 +20,6 @@ enum LiteralType : uint32_t { ArrayT = 1 << 5, NullT = 1 << 6, EpochDateT = 1 << 7, - FloatDateT = 1 << 8, TypesEnd = 1 << 9, UnknownT = ((uint32_t)1) << 31 }; @@ -74,8 +73,6 @@ class Literal : public Value { return "null"; case LiteralType::EpochDateT: return "epochdate"; - case LiteralType::FloatDateT: - return "floatdate"; default: return "errtype"; } @@ -106,8 +103,6 @@ class Literal : public Value { virtual bool as_epoch_date() { return false; } - virtual bool as_float_date() { return false; } - virtual bool as_any(FilterOperation op) { return false; } }; } // namespace clp_s::search diff --git a/components/core/src/clp_s/search/NarrowTypes.cpp b/components/core/src/clp_s/search/NarrowTypes.cpp index 82b8e7e5b..177f04c3e 100644 --- a/components/core/src/clp_s/search/NarrowTypes.cpp +++ b/components/core/src/clp_s/search/NarrowTypes.cpp @@ -62,9 +62,6 @@ std::shared_ptr NarrowTypes::narrow(std::shared_ptr cur) if (false == literal->as_epoch_date()) { column->remove_matching_type(LiteralType::EpochDateT); } - if (false == literal->as_float_date()) { - column->remove_matching_type(LiteralType::EpochDateT); - } } if (false == column->matches_any(cAllTypes)) { diff --git a/components/core/src/clp_s/search/Output.cpp b/components/core/src/clp_s/search/Output.cpp index a87e1d0a1..6ca66a62b 100644 --- a/components/core/src/clp_s/search/Output.cpp +++ b/components/core/src/clp_s/search/Output.cpp @@ -78,7 +78,6 @@ void Output::filter() { m_wildcard_to_searched_clpstrings.clear(); m_wildcard_to_searched_varstrings.clear(); m_wildcard_to_searched_datestrings.clear(); - m_wildcard_to_searched_floatdatestrings.clear(); m_schema = schema_id; populate_searched_wildcard_columns(m_expr); @@ -158,10 +157,6 @@ void Output::init( { m_datestring_readers[column.first] = date_column_reader; m_other_columns.push_back(column.second); - } else if (auto float_date_column_reader = dynamic_cast(column.second)) - { - m_floatdatestring_readers[column.first] = float_date_column_reader; - m_other_columns.push_back(column.second); } else { m_searched_columns.push_back(column.second); } @@ -321,12 +316,6 @@ bool Output::evaluate_wildcard_filter( } } - for (int32_t column_id : m_wildcard_to_searched_floatdatestrings[column]) { - if (evaluate_float_date_filter(op, m_floatdatestring_readers[column_id], literal)) { - return true; - } - } - m_maybe_number = expr->get_column()->matches_type(LiteralType::FloatT); for (int32_t column_id : m_wildcard_to_searched_columns[column]) { bool ret = false; @@ -432,12 +421,6 @@ bool Output::evaluate_filter( m_datestring_readers[column_id], literal ); - case LiteralType::FloatDateT: - return evaluate_float_date_filter( - expr->get_operation(), - m_floatdatestring_readers[column_id], - literal - ); // case LiteralType::NullT: // null checks are always turned into existence operators -- // no need to evaluate here @@ -1007,8 +990,6 @@ void Output::populate_searched_wildcard_columns(std::shared_ptr cons m_wildcard_to_searched_varstrings[col].push_back(node); } else if (tree_node_type == NodeType::DATESTRING) { m_wildcard_to_searched_datestrings[col].push_back(node); - } else if (tree_node_type == NodeType::FLOATDATESTRING) { - m_wildcard_to_searched_floatdatestrings[col].push_back(node); } else { // Arrays and basic types m_wildcard_to_searched_columns[col].push_back(node); @@ -1037,12 +1018,6 @@ void Output::add_wildcard_columns_to_searched_columns() { } } - for (auto& e : m_wildcard_to_searched_floatdatestrings) { - for (int32_t node : e.second) { - m_match.add_searched_column_to_schema(m_schema, node); - } - } - for (auto& e : m_wildcard_to_searched_columns) { for (int32_t node : e.second) { m_match.add_searched_column_to_schema(m_schema, node); @@ -1132,8 +1107,7 @@ Output::constant_propagate(std::shared_ptr const& expr, int32_t sche bool has_clp_string = false; bool matches_clp_string = false; bool has_other = !m_wildcard_to_searched_columns[wildcard].empty() - || !m_wildcard_to_searched_datestrings[wildcard].empty() - || !m_wildcard_to_searched_floatdatestrings[wildcard].empty(); + || !m_wildcard_to_searched_datestrings[wildcard].empty(); std::string filter_string; bool valid = filter->get_operand()->as_var_string(filter_string, filter->get_operation()) @@ -1248,12 +1222,4 @@ bool Output::evaluate_epoch_date_filter( ) { return evaluate_int_filter(op, reader->get_encoded_time(m_cur_message), operand); } - -bool Output::evaluate_float_date_filter( - FilterOperation op, - FloatDateStringColumnReader* reader, - std::shared_ptr& operand -) { - return evaluate_float_filter(op, reader->get_encoded_time(m_cur_message), operand); -} } // namespace clp_s::search diff --git a/components/core/src/clp_s/search/Output.hpp b/components/core/src/clp_s/search/Output.hpp index 2dc278ab0..685c3d984 100644 --- a/components/core/src/clp_s/search/Output.hpp +++ b/components/core/src/clp_s/search/Output.hpp @@ -73,14 +73,12 @@ class Output : public FilterClass { std::unordered_map m_clp_string_readers; std::unordered_map m_var_string_readers; std::unordered_map m_datestring_readers; - std::unordered_map m_floatdatestring_readers; uint64_t m_cur_message; EvaluatedValue m_expression_value; std::map> m_wildcard_to_searched_clpstrings; std::map> m_wildcard_to_searched_varstrings; std::map> m_wildcard_to_searched_datestrings; - std::map> m_wildcard_to_searched_floatdatestrings; std::map> m_wildcard_to_searched_columns; simdjson::ondemand::parser m_array_parser; @@ -207,19 +205,6 @@ class Output : public FilterClass { std::shared_ptr& operand ); - /** - * Evaluates a float date string filter expression - * @param op - * @param reader - * @param operand - * @return true if the expression evaluates to true, false otherwise - */ - bool evaluate_float_date_filter( - FilterOperation op, - FloatDateStringColumnReader* reader, - std::shared_ptr& operand - ); - /** * Evaluates an array filter expression * @param op diff --git a/components/core/src/clp_s/search/SearchUtils.cpp b/components/core/src/clp_s/search/SearchUtils.cpp index bb6e648a2..7121ddd04 100644 --- a/components/core/src/clp_s/search/SearchUtils.cpp +++ b/components/core/src/clp_s/search/SearchUtils.cpp @@ -34,8 +34,6 @@ LiteralType node_to_literal_type(NodeType type) { return LiteralType::NullT; case NodeType::DATESTRING: return LiteralType::EpochDateT; - case NodeType::FLOATDATESTRING: - return LiteralType::FloatDateT; case NodeType::UNKNOWN: default: return LiteralType::UnknownT; diff --git a/components/core/src/glt/TimestampPattern.cpp b/components/core/src/glt/TimestampPattern.cpp index 4fcb5a07b..d725906e3 100644 --- a/components/core/src/glt/TimestampPattern.cpp +++ b/components/core/src/glt/TimestampPattern.cpp @@ -124,28 +124,44 @@ void TimestampPattern::init() { patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3"); // E.g. 2015-01-31T15:50:45,392 patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S,%3"); - // E.g. [2015-01-31T15:50:45 - patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S"); - // E.g. [20170106-16:56:41] - patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]"); - // E.g. 2015-01-31 15:50:45,392 - patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3"); // E.g. 2015-01-31 15:50:45.392 patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S.%3"); + // E.g. 2015-01-31 15:50:45,392 + patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3"); + // E.g. 2015/01/31T15:50:45.123 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S.%3"); + // E.g. 2015/01/31T15:50:45,123 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S,%3"); + // E.g. 2015/01/31 15:50:45.123 + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S.%3"); + // E.g. 2015/01/31 15:50:45,123 + patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S,%3"); // E.g. [2015-01-31 15:50:45,085] patterns.emplace_back(0, "[%Y-%m-%d %H:%M:%S,%3]"); + // E.g. INFO [main] 2015-01-31 15:50:45,085 + patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3"); + // E.g. <<<2016-11-10 03:02:29:936 + patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3"); + // E.g. 01 Jan 2016 15:50:17,085 + patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3"); + // E.g. 2015-01-31T15:50:45 + patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S"); // E.g. 2015-01-31 15:50:45 patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S"); - // E.g. Start-Date: 2015-01-31 15:50:45 - patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); + // E.g. 2015/01/31T15:50:45 + patterns.emplace_back(0, "%Y/%m/%dT%H:%M:%S"); // E.g. 2015/01/31 15:50:45 patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S"); + // E.g. [2015-01-31T15:50:45 + patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S"); + // E.g. [20170106-16:56:41] + patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]"); + // E.g. Start-Date: 2015-01-31 15:50:45 + patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); // E.g. 15/01/31 15:50:45 patterns.emplace_back(0, "%y/%m/%d %H:%M:%S"); // E.g. 150131 9:50:45 patterns.emplace_back(0, "%y%m%d %k:%M:%S"); - // E.g. 01 Jan 2016 15:50:17,085 - patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3"); // E.g. Jan 01, 2016 3:50:17 PM patterns.emplace_back(0, "%b %d, %Y %l:%M:%S %p"); // E.g. January 31, 2015 15:50 @@ -157,16 +173,12 @@ void TimestampPattern::init() { patterns.emplace_back(3, "[%d/%b/%Y:%H:%M:%S"); // E.g. 192.168.4.5 - - [01/01/2016:15:50:17 patterns.emplace_back(3, "[%d/%m/%Y:%H:%M:%S"); - // E.g. INFO [main] 2015-01-31 15:50:45,085 - patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3"); // E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44 patterns.emplace_back(6, "%Y-%m-%d %H:%M:%S"); // E.g. update-alternatives 2015-01-31 15:50:45 patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); // E.g. ERROR: apport (pid 4557) Sun Jan 1 15:50:45 2015 patterns.emplace_back(4, "%a %b %e %H:%M:%S %Y"); - // E.g. <<<2016-11-10 03:02:29:936 - patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3"); // E.g. Sun Jan 1 15:50:45 2015 patterns.emplace_back(0, "%a %b %e %H:%M:%S %Y"); diff --git a/components/core/tests/test-TimestampPattern.cpp b/components/core/tests/test-TimestampPattern.cpp index bd6891944..0b9dc54cf 100644 --- a/components/core/tests/test-TimestampPattern.cpp +++ b/components/core/tests/test-TimestampPattern.cpp @@ -476,4 +476,76 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); specific_pattern.insert_formatted_timestamp(timestamp, content); REQUIRE("626000000 content after" == content); + + line = "2015/01/31 15:50:45.123 content after"; + specific_pattern = TimestampPattern{0, "%Y/%m/%d %H:%M:%S.%3"}; + specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); + REQUIRE(specific_pattern.get_format() == "%Y/%m/%d %H:%M:%S.%3"); + REQUIRE(0 == timestamp_begin_pos); + REQUIRE(23 == timestamp_end_pos); + content.assign(line, 0, timestamp_begin_pos); + content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); + specific_pattern.insert_formatted_timestamp(timestamp, content); + REQUIRE(line == content); + + line = "2015/01/31 15:50:45,123 content after"; + specific_pattern = TimestampPattern{0, "%Y/%m/%d %H:%M:%S,%3"}; + specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); + REQUIRE(specific_pattern.get_format() == "%Y/%m/%d %H:%M:%S,%3"); + REQUIRE(0 == timestamp_begin_pos); + REQUIRE(23 == timestamp_end_pos); + content.assign(line, 0, timestamp_begin_pos); + content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); + specific_pattern.insert_formatted_timestamp(timestamp, content); + REQUIRE(line == content); + + line = "2015/01/31T15:50:45 content after"; + specific_pattern = TimestampPattern{0, "%Y/%m/%dT%H:%M:%S"}; + specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); + REQUIRE(specific_pattern.get_format() == "%Y/%m/%dT%H:%M:%S"); + REQUIRE(0 == timestamp_begin_pos); + REQUIRE(19 == timestamp_end_pos); + content.assign(line, 0, timestamp_begin_pos); + content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); + specific_pattern.insert_formatted_timestamp(timestamp, content); + REQUIRE(line == content); + + line = "2015/01/31T15:50:45.123 content after"; + specific_pattern = TimestampPattern{0, "%Y/%m/%dT%H:%M:%S.%3"}; + specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); + REQUIRE(specific_pattern.get_format() == "%Y/%m/%dT%H:%M:%S.%3"); + REQUIRE(0 == timestamp_begin_pos); + REQUIRE(23 == timestamp_end_pos); + content.assign(line, 0, timestamp_begin_pos); + content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); + specific_pattern.insert_formatted_timestamp(timestamp, content); + REQUIRE(line == content); + + line = "2015/01/31T15:50:45,123 content after"; + specific_pattern = TimestampPattern{0, "%Y/%m/%dT%H:%M:%S,%3"}; + specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); + REQUIRE(specific_pattern.get_format() == "%Y/%m/%dT%H:%M:%S,%3"); + REQUIRE(0 == timestamp_begin_pos); + REQUIRE(23 == timestamp_end_pos); + content.assign(line, 0, timestamp_begin_pos); + content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); + specific_pattern.insert_formatted_timestamp(timestamp, content); + REQUIRE(line == content); + + line = "2015-01-31T15:50:45 content after"; + specific_pattern = TimestampPattern{0, "%Y-%m-%dT%H:%M:%S"}; + specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); + REQUIRE(specific_pattern.get_format() == "%Y-%m-%dT%H:%M:%S"); + REQUIRE(0 == timestamp_begin_pos); + REQUIRE(19 == timestamp_end_pos); + content.assign(line, 0, timestamp_begin_pos); + content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); + specific_pattern.insert_formatted_timestamp(timestamp, content); + REQUIRE(line == content); }