diff --git a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md index f55643986..9b152991b 100644 --- a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md +++ b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md @@ -291,22 +291,20 @@ ERRNO tsfile_reader_close(TsFileReader reader); -### Query table/get next +### Query table/get next/query by row ```C - /** - * @brief Query data from the specific table and columns within time range. + * @brief Queries data from the specified table and columns within a given time range. * - * @param reader [in] Valid TsFileReader handle from tsfile_reader_new(). - * @param table_name [in] Target table name. Must exist in the TsFile. - * @param columns [in] Array of column names to fetch. - * @param column_num [in] Number of columns in array. + * @param reader [in] A valid TsFileReader handle obtained by tsfile_reader_new(). + * @param table_name [in] Name of the target table, which must exist in the TsFile. + * @param columns [in] Array of column names to be queried. + * @param column_num [in] Number of columns in the column name array. * @param start_time [in] Start timestamp. - * @param end_time [in] End timestamp. Must ≥ start_time. - * @param err_code [out] RET_OK(0) on success, or error code in errno_define_c.h. - * @return ResultSet Query results handle. Must be freed with - * free_tsfile_result_set(). + * @param end_time [in] End timestamp, which must be greater than or equal to start_time. + * @param err_code [out] Returns RET_OK(0) on success, otherwise returns an error code defined in errno_define_c.h. + * @return ResultSet Handle of the query result set. Must be released by free_tsfile_result_set() after use. */ ResultSet tsfile_query_table(TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, @@ -314,20 +312,58 @@ ResultSet tsfile_query_table(TsFileReader reader, const char* table_name, ERRNO* err_code); /** - * @brief Check and fetch the next row in the ResultSet. + * @brief Checks and retrieves the next row of data in the result set. * - * @param result_set [in] Valid ResultSet handle. - * @param error_code RET_OK(0) on success, or error code in errno_define_c.h. - * @return bool - true: Row available, false: End of data or error. + * @param result_set [in] A valid ResultSet handle. + * @param error_code [out] Returns RET_OK(0) on success, otherwise returns an error code defined in errno_define_c.h. + * @return bool - true: Next row exists, false: Reached the end or an error occurred. */ bool tsfile_result_set_next(ResultSet result_set, ERRNO* error_code); /** - * @brief Free Result set + * @brief Releases the resources of the result set. * - * @param result_set [in] Valid ResultSet handle ptr. + * @param result_set [in] Pointer to a valid ResultSet handle. */ void free_tsfile_result_set(ResultSet* result_set); + +/** + * @brief Queries time-series data by row (tree model), supporting offset and row count limitation + * + * @param reader [in] A valid TsFileReader handle obtained by tsfile_reader_new() + * @param device_ids [in] Array of device IDs + * @param device_ids_len [in] Number of device IDs + * @param measurement_names [in] Array of measurement (sensor) names + * @param measurement_names_len [in] Number of measurement names + * @param offset [in] Number of starting rows to skip (must be >= 0) + * @param limit [in] Maximum number of rows to return, < 0 means no limitation + * @param err_code [out] Error code, returns E_OK(0) on success + * @return Returns ResultSet handle on success, NULL on failure + */ +ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, + char** device_ids, int device_ids_len, + char** measurement_names, + int measurement_names_len, int offset, + int limit, ERRNO* err_code); + +/** + * @brief Queries table model data by row, supporting offset and row count limitation pushdown + * + * @param reader [in] A valid TsFileReader handle obtained by tsfile_reader_new() + * @param table_name [in] Name of the target table + * @param column_names [in] Array of column names to be queried + * @param column_names_len [in] Number of columns to be queried + * @param offset [in] Number of starting rows to skip (must be >= 0) + * @param limit [in] Maximum number of rows to return, < 0 means no limitation + * @param tag_filter [in] Tag filter handle + * @param batch_size [in] Batch size for data query + * @param err_code [out] Error code, returns E_OK(0) on success + * @return Returns ResultSet handle on success, NULL on failure + */ +ResultSet tsfile_reader_query_table_by_row( + TsFileReader reader, const char* table_name, char** column_names, + int column_names_len, int offset, int limit, TagFilterHandle tag_filter, + int batch_size, ERRNO* err_code); ``` diff --git a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md index b1642f88b..5f5f5002b 100644 --- a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md +++ b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md @@ -27,53 +27,92 @@ Used to write data to tsfile ```cpp +namespace storage { +class RestorableTsFileIOWriter; + /** - * @brief Facilitates writing structured table data into a TsFile with a specified schema. + * @brief Supports writing structured table data to TsFile according to the specified table schema * - * The TsFileTableWriter class is designed to write structured data, particularly suitable for time-series data, - * into a file optimized for efficient storage and retrieval (referred to as TsFile here). It allows users to define - * the schema of the tables they want to write, add rows of data according to that schema, and serialize this data - * into a TsFile. Additionally, it provides options to limit memory usage during the writing process. + * The TsFileTableWriter class is used to write structured data (especially suitable for time-series data) + * to TsFile optimized for efficient storage and querying. + * Users can define the structure of the table to be written, add data rows according to the structure, + * and serialize the data into TsFile. + * Meanwhile, this class provides the ability to limit memory usage during the writing process. */ class TsFileTableWriter { public: /** - * TsFileTableWriter is used to write table data into a target file with the given schema, - * optionally limiting the memory usage. + * TsFileTableWriter is used to write table data to the target file according to the specified table schema, + * and can optionally limit the memory usage. * - * @param writer_file Target file where the table data will be written. Must not be null. - * @param table_schema Used to construct table structures. Defines the schema of the table - * being written. - * @param memory_threshold Optional parameter. When the size of written - * data exceeds this value, the data will be automatically flushed to the - * disk. Default value is 128MB. + * @param writer_file Target file for writing table data, cannot be a null pointer + * @param table_schema Used to construct the table structure and define the schema of the table to be written + * @param memory_threshold Optional parameter. When the written data volume exceeds this threshold, + * data will be automatically flushed to disk. The default value is 128MB */ - TsFileTableWriter(WriteFile* writer_file, - TableSchema* table_schema, - uint64_t memory_threshold = 128 * 1024 * 1024); - ~TsFileTableWriter(); + template + explicit TsFileTableWriter(storage::WriteFile* writer_file, T* table_schema, + uint64_t memory_threshold = 128 * 1024 * 1024) { + static_assert(!std::is_same::value, + "table_schema cannot be nullptr"); + tsfile_writer_ = std::make_shared(); + tsfile_writer_->init(writer_file); + tsfile_writer_->set_generate_table_schema(false); + + // Perform a deep copy. The source TableSchema object may be allocated on the stack/heap + auto table_schema_ptr = std::make_shared(*table_schema); + error_number = tsfile_writer_->register_table(table_schema_ptr); + exclusive_table_name_ = table_schema->get_table_name(); + common::g_config_value_.chunk_group_size_threshold_ = memory_threshold; + } + /** - * Writes the given tablet data into the target file according to the schema. + * Constructs TsFileTableWriter from a restorable TsFileIOWriter, + * supporting appending table data after failure recovery. + * The schema is read from the recovered file without additional TableSchema input. * - * @param tablet The tablet containing the data to be written. Must not be null. - * @return Returns 0 on success, or a non-zero error code on failure. + * @param restorable_writer Recovered I/O writer; cannot be a null pointer, + * and must be opened in truncate mode to ensure can_write() returns true + * @param memory_threshold Optional memory threshold for cached data */ - int write_table(const Tablet& tablet); + explicit TsFileTableWriter( + storage::RestorableTsFileIOWriter* restorable_writer, + uint64_t memory_threshold = 128 * 1024 * 1024); + /** - * Flushes any buffered data to the underlying storage medium, ensuring all data is written out. - * This method ensures that all pending writes are persisted. + * Registers a table schema with the writer * - * @return Returns 0 on success, or a non-zero error code on failure. + * @param table_schema The table schema to be registered, cannot be a null pointer + * @return Returns 0 on success, non-zero error code on failure + */ + int register_table(const std::shared_ptr& table_schema); + + /** + * Writes the specified Tablet data to the target file according to the table schema + * + * @param tablet Tablet containing the data to be written, cannot be a null pointer + * @return Returns 0 on success, non-zero error code on failure + */ + int write_table(Tablet& tablet) const; + + /** + * Flushes all cached data to the underlying storage medium to ensure all data is persisted. + * This method guarantees that all pending data is written to disk. + * + * @return Returns 0 on success, non-zero error code on failure */ int flush(); + /** - * Closes the writer and releases any resources held by it. - * After calling this method, no further operations should be performed on this instance. + * Closes the writer and releases all resources it occupies. + * No subsequent operations should be performed on the current instance after calling this method. * - * @return Returns 0 on success, or a non-zero error code on failure. + * @return Returns 0 on success, non-zero error code on failure */ int close(); }; + +} // namespace storage ``` ### TableSchema @@ -214,127 +253,257 @@ public: }; ``` +### RestorableTsFileIOWriter +> V2.3.0 + +```cpp +namespace storage { +/** + * RestorableTsFileIOWriter is used to open a TsFile and perform optional recovery operations on it. + * Inherits from TsFileIOWriter and supports continuous writing after file recovery. + * + * (1) If the TsFile was closed normally: has_crashed()=false, can_write()=false + * + * (2) If the TsFile is incomplete / the program crashed: has_crashed()=true, + * can_write()=true. The writer will truncate the corrupted data and allow further writing. + * + * Implemented based on standard C++11, uses RAII and smart pointers to avoid memory leaks. + */ +class RestorableTsFileIOWriter : public TsFileIOWriter { + public: + RestorableTsFileIOWriter(); + + /** + * Opens a TsFile for recovery / appending data. + * Uses O_RDWR|O_CREAT mode without O_TRUNC, so the original file content is preserved. + * + * @param file_path Path of the TsFile + * @param truncate_corrupted If true, truncate the corrupted data; + * If false, do not truncate (the incomplete file remains unchanged) + * @return E_OK on success, error code on failure + */ + int open(const std::string& file_path, bool truncate_corrupted = true); + + /** + * Closes the file + */ + void close(); +}; + +} // namespace storage +``` + + + ## Read Interface ### Tsfile Reader use to execute query in tsfile and return value by ResultSet. ```cpp +namespace storage { /** - * @brief TsfileReader provides the ability to query all files with the suffix - * .tsfile + * @brief TsFileReader provides the ability to query all files with the .tsfile suffix * - * TsfileReader is designed to query .tsfile files, it accepts tree model - * queries and table model queries, and supports querying metadata such as - * TableSchema and TimeseriesSchema. + * TsFileReader is designed specifically for querying .tsfile files, supporting both tree-model queries and table-model queries. + * It also supports querying metadata such as table schemas (TableSchema) and time-series schemas (TimeseriesSchema). */ class TsFileReader { public: TsFileReader(); - ~TsFileReader(); /** - * @brief open the tsfile + * @brief Opens a TsFile * - * @param file_path the path of the tsfile which will be opened - * @return Returns 0 on success, or a non-zero error code on failure. + * @param file_path Path of the TsFile to be opened + * @return 0 on success, non-zero error code on failure */ - int open(const std::string &file_path); + int open(const std::string& file_path); /** - * @brief close the tsfile, this method should be called after the - * query is finished + * @brief Closes the TsFile. This method should be called after queries are completed. * - * @return Returns 0 on success, or a non-zero error code on failure. + * @return 0 on success, non-zero error code on failure */ int close(); /** - * @brief query the tsfile by the query expression,Users can construct - * their own query expressions to query tsfile + * @brief Queries the TsFile using a query expression. Users can construct custom query expressions for execution. * - * @param [in] qe the query expression - * @param [out] ret_qds the result set - * @return Returns 0 on success, or a non-zero error code on failure. + * @param [in] qe Query expression + * @param [out] ret_qds Result set + * @return 0 on success, non-zero error code on failure */ - int query(storage::QueryExpression *qe, ResultSet *&ret_qds); + int query(storage::QueryExpression* qe, ResultSet*& ret_qds); /** - * @brief query the tsfile by the path list, start time and end time - * this method is used to query the tsfile by the tree model. + * @brief Queries the TsFile by path list, start time, and end time. + * This method is used for tree-model queries on TsFile. * - * @param [in] path_list the path list - * @param [in] start_time the start time - * @param [in] end_time the end time - * @param [out] result_set the result set + * @param [in] path_list Path list + * @param [in] start_time Start timestamp + * @param [in] end_time End timestamp + * @param [out] result_set Result set + * @return 0 on success, non-zero error code on failure */ - int query(std::vector &path_list, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(std::vector& path_list, int64_t start_time, + int64_t end_time, ResultSet*& result_set); /** - * @brief query the tsfile by the table name, columns names, start time - * and end time. this method is used to query the tsfile by the table - * model. + * @brief Queries the TsFile by table name, column names, start time, and end time. + * This method is used for table-model queries on TsFile. * - * @param [in] table_name the table name - * @param [in] columns_names the columns names - * @param [in] start_time the start time - * @param [in] end_time the end time - * @param [out] result_set the result set + * @param [in] table_name Table name + * @param [in] columns_names List of column names + * @param [in] start_time Start timestamp + * @param [in] end_time End timestamp + * @param [out] result_set Result set + * @param [in] batch_size ≤ 0 for row-by-row mode; + * > 0 to return TsBlock chunks of the specified size + * @return 0 on success, non-zero error code on failure */ - int query(const std::string &table_name, - const std::vector &columns_names, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(const std::string& table_name, + const std::vector& columns_names, int64_t start_time, + int64_t end_time, ResultSet*& result_set, int batch_size = -1); /** - * @brief query the tsfile by the table name, columns names, start time - * and end time, tag filter. this method is used to query the tsfile by the - * table model. + * @brief Queries the TsFile by table name, column names, start time, end time, and tag filter conditions. + * This method is used for table-model queries on TsFile. * - * @param [in] table_name the table name - * @param [in] columns_names the columns names - * @param [in] start_time the start time - * @param [in] end_time the end time - * @param [in] tag_filter the tag filter - * @param [out] result_set the result set + * @param [in] table_name Table name + * @param [in] columns_names List of column names + * @param [in] start_time Start timestamp + * @param [in] end_time End timestamp + * @param [in] tag_filter Tag filter condition + * @param [out] result_set Result set + * @param [in] batch_size Batch reading size + * @return 0 on success, non-zero error code on failure */ int query(const std::string& table_name, const std::vector& columns_names, int64_t start_time, - int64_t end_time, ResultSet*& result_set, Filter* tag_filter); + int64_t end_time, ResultSet*& result_set, Filter* tag_filter, + int batch_size = 0); /** - * @brief destroy the result set, this method should be called after the - * query is finished and result_set + * @brief Queries tree-model time-series data by row with offset and row limit. * - * @param qds the result set + * @param path_list Full paths to query (device.measurement) + * @param offset Number of starting rows to skip (>= 0) + * @param limit Maximum number of rows to return; no limit if < 0 + * @param[out] result_set Result set to store query results + * @return 0 on success, non-zero error code on failure */ - void destroy_query_data_set(ResultSet *qds); - ResultSet *read_timeseries( - const std::shared_ptr &device_id, - const std::vector &measurement_name); + int queryByRow(std::vector& path_list, int offset, int limit, + ResultSet*& result_set); + /** - * @brief get all devices in the tsfile + * @brief Queries table-model data by row with pushed-down offset and row limit. + * + * For dense devices (all columns have the same row count), + * offset/limit are pushed down to the data block/page level via SSI, + * skipping entire blocks/pages without decoding. + * For sparse devices, offset/limit take effect during row merging. + * Entire devices can be skipped directly if their total rows fall within the offset range. + * + * @param table_name Table name to query + * @param column_names Column names to query + * @param offset Number of starting rows to skip (>= 0) + * @param limit Maximum number of rows to return; no limit if < 0 + * @param[out] result_set Result set to store query results + * @param tag_filter Optional tag filter condition for filtering data by tag columns + * @param batch_size Batch reading size + * @return 0 on success, non-zero error code on failure + */ + int queryByRow(const std::string& table_name, + const std::vector& column_names, int offset, + int limit, ResultSet*& result_set, + Filter* tag_filter = nullptr, int batch_size = 0); + + /** + * @brief Performs a table query on the tree model. * - * @param table_name the table name - * @return std::vector> the device id list + * @param measurement_names List of measurement names + * @param start_time Start timestamp + * @param end_time End timestamp + * @param result_set Result set + * @return 0 on success, non-zero error code on failure + */ + int query_table_on_tree(const std::vector& measurement_names, + int64_t start_time, int64_t end_time, + ResultSet*& result_set); + /** + * @brief Destroys the result set. This method should be called after the query is completed and the result set is no longer used. + * + * @param qds Result set object + */ + void destroy_query_data_set(ResultSet* qds); + /** + * @brief Reads time-series data by device ID and measurement names. + * + * @param device_id Device ID + * @param measurement_name List of measurement names + * @return Result set object + */ + ResultSet* read_timeseries( + const std::shared_ptr& device_id, + const std::vector& measurement_name); + /** + * @brief Gets all devices in the TsFile for a specified table. + * + * @param table_name Table name + * @return List of device IDs */ std::vector> get_all_devices( std::string table_name); + /** - * @brief get the timeseries schema by the device id and measurement name + * @brief Gets all device IDs in the TsFile. * - * @param [in] device_id the device id - * @param [out] result std::vector the measurement schema - * list - * @return Returns 0 on success, or a non-zero error code on failure. + * @return List of device IDs + */ + std::vector> get_all_device_ids(); + + /** + * @brief Gets all device IDs in the file (functionally identical to get_all_device_ids). + * + * @return List of devices + */ + std::vector> get_all_devices(); + + /** + * @brief Gets time-series schemas by device ID and measurement names. + * + * @param [in] device_id Device ID + * @param [out] result List of measurement schemas + * @return 0 on success, non-zero error code on failure */ int get_timeseries_schema(std::shared_ptr device_id, - std::vector &result); + std::vector& result); + + /** + * @brief Gets time-series metadata for specified devices. + * + * Only devices existing in the file are included in the result. + * Returns an empty map if the device ID list is empty. + * + * @param device_ids List of devices to query + * @return Mapping: Device ID -> List of time-series metadata (existing entries only) + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata( + const std::vector>& device_ids); + + /** + * @brief Gets time-series metadata for all devices in the file. + * + * @return Mapping: Device ID -> List of time-series metadata + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata(); + /** - * @brief get the table schema by the table name + * @brief Gets the table schema by table name. * - * @param table_name the table name - * @return std::shared_ptr the table schema + * @param table_name Table name + * @return Shared pointer to the table schema */ std::shared_ptr get_table_schema( - const std::string &table_name); + const std::string& table_name); /** - * @brief get all table schemas in the tsfile + * @brief Gets all table schemas in the TsFile. * - * @return std::vector> the table schema list + * @return List of table schemas */ std::vector> get_all_table_schemas(); }; diff --git a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md index 2ed66d8f5..f74354b0d 100644 --- a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md +++ b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md @@ -147,52 +147,141 @@ class Tablet(object) ```python class TsFileReader: """ - Query table data from a TsFile. - """ - - """ - Initialize a TsFile reader for the specified file path. - :param pathname: The path to the TsFile. - :return no return value. - """ - def __init__(self, pathname) - - - """ - Executes a time range query on the specified table and columns. - - :param table_name: The name of the table to query. - :param column_names: A list of column names to retrieve. - :param start_time: The start time of the query range (default: minimum int64 value). - :param end_time: The end time of the query range (default: maximum int64 value). - :return: A query result set handler. - """ - def query_table(self, table_name : str, column_names : List[str], - start_time : int = np.iinfo(np.int64).min, - end_time: int = np.iinfo(np.int64).max) -> ResultSet - - """ - Retrieves the schema of the specified table. - - :param table_name: The name of the table. - :return: The schema of the specified table. - """ - def get_table_schema(self, table_name : str)-> TableSchema - - - """ - Retrieves the schemas of all tables in the TsFile. - - :return: A dictionary mapping table names to their schemas. - """ - def get_all_table_schemas(self) ->dict[str, TableSchema] - - - """ - Closes the TsFile reader. If the reader has active result sets, they will be invalidated. - """ - def close(self) - + Query table data and time-series data from TsFile, providing standardized file reading and query interfaces. + Supports full core capabilities including table model query, tree model query, metadata acquisition, and resource management. + """ + + def __init__(self, pathname: str): + """ + Initialize the TsFile reader for the specified path, complete file loading and underlying reader initialization, + and maintain all active query result sets to ensure all result sets are invalidated synchronously when the reader is closed. + + :param pathname: Full path of the TsFile to be read + :return: No return value + """ + + def query_table(self, table_name: str, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + Perform time-range query on the specified table and columns, supporting tag filtering and batch reading mode. + Adapts to both row-by-row return and fixed-size data block return modes to meet reading requirements in different scenarios. + + :param table_name: Name of the target table to query, case-insensitive + :param column_names: List of target column names to retrieve; all columns are queried by default if empty + :param start_time: Start timestamp of the query range, default is the minimum value of int64 type + :param end_time: End timestamp of the query range, default is the maximum value of int64 type + :param tag_filter: Optional parameter, filter conditions based on tag columns, supporting equality, range, and logical combination filters + :param batch_size: Batch reading size; row-by-row mode is enabled when ≤ 0, data blocks are returned by the specified size when > 0 + :return: Encapsulated query result set handler for traversing data, reading data, and obtaining metadata + """ + + def query_table_on_tree(self, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max) -> object: + """ + Perform table query on the tree model structure, adapted for query scenarios of native tree-structured time-series data. + Query directly based on measurement names without specifying a table name; path names are case-sensitive. + + :param column_names: List of measurement names to query, corresponding to node paths in the tree structure + :param start_time: Start timestamp of the query range, default is the minimum value of int64 type + :param end_time: End timestamp of the query range, default is the maximum value of int64 type + :return: Result set handler corresponding to the tree model query + """ + + def query_tree_by_row(self, device_ids: List[str], measurement_names: List[str], + offset: int = 0, limit: int = -1) -> object: + """ + Query tree model time-series data by row with pagination, supporting offset skipping and maximum return row limit. + Adapted for large data volume pagination reading to avoid memory overflow caused by loading excessive data at once. + + :param device_ids: List of device IDs to query, cannot be empty + :param measurement_names: List of measurement names to query, cannot be empty + :param offset: Number of starting rows to skip, starting from 0 by default + :param limit: Maximum number of rows to return; no limit if less than 0 + :return: Result set handler for tree model pagination query + """ + + def query_table_by_row(self, table_name: str, column_names: List[str], + offset: int = 0, limit: int = -1, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + Query table model data by row with pagination, supporting offset and row limit pushdown, and can be used with tag filtering. + Invalid data can be skipped at the data block level for dense devices, greatly improving pagination query efficiency. + + :param table_name: Name of the target table to query + :param column_names: List of column names to query + :param offset: Number of starting rows to skip, starting from 0 by default + :param limit: Maximum number of rows to return; no limit if less than 0 + :param tag_filter: Optional parameter, tag filter condition to filter device data that meets the criteria + :param batch_size: Batch reading size, adapted to the underlying data block reading logic + :return: Result set handler for table model pagination query + """ + + def query_timeseries(self, device_name: str, sensor_list: List[str], + start_time: int = 0, end_time: int = 0) -> object: + """ + Perform time-range time-series data query for a single specified device. + Adapted for precise query scenarios of a single device with multiple sensors, simplifying query invocation logic. + + :param device_name: Name/path of the target device + :param sensor_list: List of sensor (measurement) names to query + :param start_time: Query start timestamp; starts from the earliest time of the file by default if 0 + :param end_time: Query end timestamp; ends at the latest time of the file by default if 0 + :return: Result set handler for single-device time-series query + """ + + def get_table_schema(self, table_name: str) -> object: + """ + Get the complete schema information of the specified table, including full metadata such as column names, data types, tag columns, and time-series constraints. + Used to verify the legality of query fields in advance and parse data structures. + + :param table_name: Name of the target table + :return: Schema information object of the corresponding table, containing full configuration of the table structure + """ + + def get_all_table_schemas(self) -> Dict[str, object]: + """ + Get schema information of all tables in the current TsFile. + Traverse all data table structures in the file with one click without querying table by table. + + :return: Dictionary structure, key is table name, value is schema information object of the corresponding table + """ + + def get_all_timeseries_schemas(self) -> List[object]: + """ + Get schema information of all time-series in the TsFile. + Covers field, type, and constraint information of full time-series data in both tree model and table model. + + :return: List of all time-series schema information + """ + + def get_all_devices(self) -> List[str]: + """ + Get identification information of all devices in the TsFile. + Can traverse all devices in the file, adapted for full-device statistics and batch query pre-operations. + + :return: List composed of all device IDs/device paths + """ + + def get_timeseries_metadata(self, device_ids: Optional[List[str]] = None) -> Dict[str, object]: + """ + Get time-series metadata of specified devices, including data storage segments, field constraints, data ranges, etc. + Returns metadata of all devices by default if no device ID is passed, returns an empty dictionary if an empty list is passed. + + :param device_ids: Optional parameter, list of device IDs to query metadata for + :return: Dictionary structure, key is device path, value is time-series metadata group of the corresponding device + """ + + def close(self) -> None: + """ + Close the TsFile reader, release underlying file handles and memory resources. + Mark all current active query result sets as invalid and prohibit subsequent data reading operations. + No query or metadata acquisition operations can be performed after closing; the reader needs to be reinitialized. + """ ``` ### ResultSet @@ -283,7 +372,6 @@ class ResultSet: def close(self) ``` - ### to_dataframe ```python @@ -315,7 +403,7 @@ def to_dataframe(file_path: str, found in the schema will be used. column_names : Optional[list[str]], default None - List of column names to query. + List of column/measurement names to query. - If None, all columns will be returned. - Column existence will be validated in table-model TsFiles. @@ -353,4 +441,4 @@ def to_dataframe(file_path: str, ColumnNotExistError If any specified column does not exist in the table schema. """ -``` +``` \ No newline at end of file diff --git a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md index f55643986..9b152991b 100644 --- a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md +++ b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md @@ -291,22 +291,20 @@ ERRNO tsfile_reader_close(TsFileReader reader); -### Query table/get next +### Query table/get next/query by row ```C - /** - * @brief Query data from the specific table and columns within time range. + * @brief Queries data from the specified table and columns within a given time range. * - * @param reader [in] Valid TsFileReader handle from tsfile_reader_new(). - * @param table_name [in] Target table name. Must exist in the TsFile. - * @param columns [in] Array of column names to fetch. - * @param column_num [in] Number of columns in array. + * @param reader [in] A valid TsFileReader handle obtained by tsfile_reader_new(). + * @param table_name [in] Name of the target table, which must exist in the TsFile. + * @param columns [in] Array of column names to be queried. + * @param column_num [in] Number of columns in the column name array. * @param start_time [in] Start timestamp. - * @param end_time [in] End timestamp. Must ≥ start_time. - * @param err_code [out] RET_OK(0) on success, or error code in errno_define_c.h. - * @return ResultSet Query results handle. Must be freed with - * free_tsfile_result_set(). + * @param end_time [in] End timestamp, which must be greater than or equal to start_time. + * @param err_code [out] Returns RET_OK(0) on success, otherwise returns an error code defined in errno_define_c.h. + * @return ResultSet Handle of the query result set. Must be released by free_tsfile_result_set() after use. */ ResultSet tsfile_query_table(TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, @@ -314,20 +312,58 @@ ResultSet tsfile_query_table(TsFileReader reader, const char* table_name, ERRNO* err_code); /** - * @brief Check and fetch the next row in the ResultSet. + * @brief Checks and retrieves the next row of data in the result set. * - * @param result_set [in] Valid ResultSet handle. - * @param error_code RET_OK(0) on success, or error code in errno_define_c.h. - * @return bool - true: Row available, false: End of data or error. + * @param result_set [in] A valid ResultSet handle. + * @param error_code [out] Returns RET_OK(0) on success, otherwise returns an error code defined in errno_define_c.h. + * @return bool - true: Next row exists, false: Reached the end or an error occurred. */ bool tsfile_result_set_next(ResultSet result_set, ERRNO* error_code); /** - * @brief Free Result set + * @brief Releases the resources of the result set. * - * @param result_set [in] Valid ResultSet handle ptr. + * @param result_set [in] Pointer to a valid ResultSet handle. */ void free_tsfile_result_set(ResultSet* result_set); + +/** + * @brief Queries time-series data by row (tree model), supporting offset and row count limitation + * + * @param reader [in] A valid TsFileReader handle obtained by tsfile_reader_new() + * @param device_ids [in] Array of device IDs + * @param device_ids_len [in] Number of device IDs + * @param measurement_names [in] Array of measurement (sensor) names + * @param measurement_names_len [in] Number of measurement names + * @param offset [in] Number of starting rows to skip (must be >= 0) + * @param limit [in] Maximum number of rows to return, < 0 means no limitation + * @param err_code [out] Error code, returns E_OK(0) on success + * @return Returns ResultSet handle on success, NULL on failure + */ +ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, + char** device_ids, int device_ids_len, + char** measurement_names, + int measurement_names_len, int offset, + int limit, ERRNO* err_code); + +/** + * @brief Queries table model data by row, supporting offset and row count limitation pushdown + * + * @param reader [in] A valid TsFileReader handle obtained by tsfile_reader_new() + * @param table_name [in] Name of the target table + * @param column_names [in] Array of column names to be queried + * @param column_names_len [in] Number of columns to be queried + * @param offset [in] Number of starting rows to skip (must be >= 0) + * @param limit [in] Maximum number of rows to return, < 0 means no limitation + * @param tag_filter [in] Tag filter handle + * @param batch_size [in] Batch size for data query + * @param err_code [out] Error code, returns E_OK(0) on success + * @return Returns ResultSet handle on success, NULL on failure + */ +ResultSet tsfile_reader_query_table_by_row( + TsFileReader reader, const char* table_name, char** column_names, + int column_names_len, int offset, int limit, TagFilterHandle tag_filter, + int batch_size, ERRNO* err_code); ``` diff --git a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md index b1642f88b..5f5f5002b 100644 --- a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md +++ b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md @@ -27,53 +27,92 @@ Used to write data to tsfile ```cpp +namespace storage { +class RestorableTsFileIOWriter; + /** - * @brief Facilitates writing structured table data into a TsFile with a specified schema. + * @brief Supports writing structured table data to TsFile according to the specified table schema * - * The TsFileTableWriter class is designed to write structured data, particularly suitable for time-series data, - * into a file optimized for efficient storage and retrieval (referred to as TsFile here). It allows users to define - * the schema of the tables they want to write, add rows of data according to that schema, and serialize this data - * into a TsFile. Additionally, it provides options to limit memory usage during the writing process. + * The TsFileTableWriter class is used to write structured data (especially suitable for time-series data) + * to TsFile optimized for efficient storage and querying. + * Users can define the structure of the table to be written, add data rows according to the structure, + * and serialize the data into TsFile. + * Meanwhile, this class provides the ability to limit memory usage during the writing process. */ class TsFileTableWriter { public: /** - * TsFileTableWriter is used to write table data into a target file with the given schema, - * optionally limiting the memory usage. + * TsFileTableWriter is used to write table data to the target file according to the specified table schema, + * and can optionally limit the memory usage. * - * @param writer_file Target file where the table data will be written. Must not be null. - * @param table_schema Used to construct table structures. Defines the schema of the table - * being written. - * @param memory_threshold Optional parameter. When the size of written - * data exceeds this value, the data will be automatically flushed to the - * disk. Default value is 128MB. + * @param writer_file Target file for writing table data, cannot be a null pointer + * @param table_schema Used to construct the table structure and define the schema of the table to be written + * @param memory_threshold Optional parameter. When the written data volume exceeds this threshold, + * data will be automatically flushed to disk. The default value is 128MB */ - TsFileTableWriter(WriteFile* writer_file, - TableSchema* table_schema, - uint64_t memory_threshold = 128 * 1024 * 1024); - ~TsFileTableWriter(); + template + explicit TsFileTableWriter(storage::WriteFile* writer_file, T* table_schema, + uint64_t memory_threshold = 128 * 1024 * 1024) { + static_assert(!std::is_same::value, + "table_schema cannot be nullptr"); + tsfile_writer_ = std::make_shared(); + tsfile_writer_->init(writer_file); + tsfile_writer_->set_generate_table_schema(false); + + // Perform a deep copy. The source TableSchema object may be allocated on the stack/heap + auto table_schema_ptr = std::make_shared(*table_schema); + error_number = tsfile_writer_->register_table(table_schema_ptr); + exclusive_table_name_ = table_schema->get_table_name(); + common::g_config_value_.chunk_group_size_threshold_ = memory_threshold; + } + /** - * Writes the given tablet data into the target file according to the schema. + * Constructs TsFileTableWriter from a restorable TsFileIOWriter, + * supporting appending table data after failure recovery. + * The schema is read from the recovered file without additional TableSchema input. * - * @param tablet The tablet containing the data to be written. Must not be null. - * @return Returns 0 on success, or a non-zero error code on failure. + * @param restorable_writer Recovered I/O writer; cannot be a null pointer, + * and must be opened in truncate mode to ensure can_write() returns true + * @param memory_threshold Optional memory threshold for cached data */ - int write_table(const Tablet& tablet); + explicit TsFileTableWriter( + storage::RestorableTsFileIOWriter* restorable_writer, + uint64_t memory_threshold = 128 * 1024 * 1024); + /** - * Flushes any buffered data to the underlying storage medium, ensuring all data is written out. - * This method ensures that all pending writes are persisted. + * Registers a table schema with the writer * - * @return Returns 0 on success, or a non-zero error code on failure. + * @param table_schema The table schema to be registered, cannot be a null pointer + * @return Returns 0 on success, non-zero error code on failure + */ + int register_table(const std::shared_ptr& table_schema); + + /** + * Writes the specified Tablet data to the target file according to the table schema + * + * @param tablet Tablet containing the data to be written, cannot be a null pointer + * @return Returns 0 on success, non-zero error code on failure + */ + int write_table(Tablet& tablet) const; + + /** + * Flushes all cached data to the underlying storage medium to ensure all data is persisted. + * This method guarantees that all pending data is written to disk. + * + * @return Returns 0 on success, non-zero error code on failure */ int flush(); + /** - * Closes the writer and releases any resources held by it. - * After calling this method, no further operations should be performed on this instance. + * Closes the writer and releases all resources it occupies. + * No subsequent operations should be performed on the current instance after calling this method. * - * @return Returns 0 on success, or a non-zero error code on failure. + * @return Returns 0 on success, non-zero error code on failure */ int close(); }; + +} // namespace storage ``` ### TableSchema @@ -214,127 +253,257 @@ public: }; ``` +### RestorableTsFileIOWriter +> V2.3.0 + +```cpp +namespace storage { +/** + * RestorableTsFileIOWriter is used to open a TsFile and perform optional recovery operations on it. + * Inherits from TsFileIOWriter and supports continuous writing after file recovery. + * + * (1) If the TsFile was closed normally: has_crashed()=false, can_write()=false + * + * (2) If the TsFile is incomplete / the program crashed: has_crashed()=true, + * can_write()=true. The writer will truncate the corrupted data and allow further writing. + * + * Implemented based on standard C++11, uses RAII and smart pointers to avoid memory leaks. + */ +class RestorableTsFileIOWriter : public TsFileIOWriter { + public: + RestorableTsFileIOWriter(); + + /** + * Opens a TsFile for recovery / appending data. + * Uses O_RDWR|O_CREAT mode without O_TRUNC, so the original file content is preserved. + * + * @param file_path Path of the TsFile + * @param truncate_corrupted If true, truncate the corrupted data; + * If false, do not truncate (the incomplete file remains unchanged) + * @return E_OK on success, error code on failure + */ + int open(const std::string& file_path, bool truncate_corrupted = true); + + /** + * Closes the file + */ + void close(); +}; + +} // namespace storage +``` + + + ## Read Interface ### Tsfile Reader use to execute query in tsfile and return value by ResultSet. ```cpp +namespace storage { /** - * @brief TsfileReader provides the ability to query all files with the suffix - * .tsfile + * @brief TsFileReader provides the ability to query all files with the .tsfile suffix * - * TsfileReader is designed to query .tsfile files, it accepts tree model - * queries and table model queries, and supports querying metadata such as - * TableSchema and TimeseriesSchema. + * TsFileReader is designed specifically for querying .tsfile files, supporting both tree-model queries and table-model queries. + * It also supports querying metadata such as table schemas (TableSchema) and time-series schemas (TimeseriesSchema). */ class TsFileReader { public: TsFileReader(); - ~TsFileReader(); /** - * @brief open the tsfile + * @brief Opens a TsFile * - * @param file_path the path of the tsfile which will be opened - * @return Returns 0 on success, or a non-zero error code on failure. + * @param file_path Path of the TsFile to be opened + * @return 0 on success, non-zero error code on failure */ - int open(const std::string &file_path); + int open(const std::string& file_path); /** - * @brief close the tsfile, this method should be called after the - * query is finished + * @brief Closes the TsFile. This method should be called after queries are completed. * - * @return Returns 0 on success, or a non-zero error code on failure. + * @return 0 on success, non-zero error code on failure */ int close(); /** - * @brief query the tsfile by the query expression,Users can construct - * their own query expressions to query tsfile + * @brief Queries the TsFile using a query expression. Users can construct custom query expressions for execution. * - * @param [in] qe the query expression - * @param [out] ret_qds the result set - * @return Returns 0 on success, or a non-zero error code on failure. + * @param [in] qe Query expression + * @param [out] ret_qds Result set + * @return 0 on success, non-zero error code on failure */ - int query(storage::QueryExpression *qe, ResultSet *&ret_qds); + int query(storage::QueryExpression* qe, ResultSet*& ret_qds); /** - * @brief query the tsfile by the path list, start time and end time - * this method is used to query the tsfile by the tree model. + * @brief Queries the TsFile by path list, start time, and end time. + * This method is used for tree-model queries on TsFile. * - * @param [in] path_list the path list - * @param [in] start_time the start time - * @param [in] end_time the end time - * @param [out] result_set the result set + * @param [in] path_list Path list + * @param [in] start_time Start timestamp + * @param [in] end_time End timestamp + * @param [out] result_set Result set + * @return 0 on success, non-zero error code on failure */ - int query(std::vector &path_list, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(std::vector& path_list, int64_t start_time, + int64_t end_time, ResultSet*& result_set); /** - * @brief query the tsfile by the table name, columns names, start time - * and end time. this method is used to query the tsfile by the table - * model. + * @brief Queries the TsFile by table name, column names, start time, and end time. + * This method is used for table-model queries on TsFile. * - * @param [in] table_name the table name - * @param [in] columns_names the columns names - * @param [in] start_time the start time - * @param [in] end_time the end time - * @param [out] result_set the result set + * @param [in] table_name Table name + * @param [in] columns_names List of column names + * @param [in] start_time Start timestamp + * @param [in] end_time End timestamp + * @param [out] result_set Result set + * @param [in] batch_size ≤ 0 for row-by-row mode; + * > 0 to return TsBlock chunks of the specified size + * @return 0 on success, non-zero error code on failure */ - int query(const std::string &table_name, - const std::vector &columns_names, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(const std::string& table_name, + const std::vector& columns_names, int64_t start_time, + int64_t end_time, ResultSet*& result_set, int batch_size = -1); /** - * @brief query the tsfile by the table name, columns names, start time - * and end time, tag filter. this method is used to query the tsfile by the - * table model. + * @brief Queries the TsFile by table name, column names, start time, end time, and tag filter conditions. + * This method is used for table-model queries on TsFile. * - * @param [in] table_name the table name - * @param [in] columns_names the columns names - * @param [in] start_time the start time - * @param [in] end_time the end time - * @param [in] tag_filter the tag filter - * @param [out] result_set the result set + * @param [in] table_name Table name + * @param [in] columns_names List of column names + * @param [in] start_time Start timestamp + * @param [in] end_time End timestamp + * @param [in] tag_filter Tag filter condition + * @param [out] result_set Result set + * @param [in] batch_size Batch reading size + * @return 0 on success, non-zero error code on failure */ int query(const std::string& table_name, const std::vector& columns_names, int64_t start_time, - int64_t end_time, ResultSet*& result_set, Filter* tag_filter); + int64_t end_time, ResultSet*& result_set, Filter* tag_filter, + int batch_size = 0); /** - * @brief destroy the result set, this method should be called after the - * query is finished and result_set + * @brief Queries tree-model time-series data by row with offset and row limit. * - * @param qds the result set + * @param path_list Full paths to query (device.measurement) + * @param offset Number of starting rows to skip (>= 0) + * @param limit Maximum number of rows to return; no limit if < 0 + * @param[out] result_set Result set to store query results + * @return 0 on success, non-zero error code on failure */ - void destroy_query_data_set(ResultSet *qds); - ResultSet *read_timeseries( - const std::shared_ptr &device_id, - const std::vector &measurement_name); + int queryByRow(std::vector& path_list, int offset, int limit, + ResultSet*& result_set); + /** - * @brief get all devices in the tsfile + * @brief Queries table-model data by row with pushed-down offset and row limit. + * + * For dense devices (all columns have the same row count), + * offset/limit are pushed down to the data block/page level via SSI, + * skipping entire blocks/pages without decoding. + * For sparse devices, offset/limit take effect during row merging. + * Entire devices can be skipped directly if their total rows fall within the offset range. + * + * @param table_name Table name to query + * @param column_names Column names to query + * @param offset Number of starting rows to skip (>= 0) + * @param limit Maximum number of rows to return; no limit if < 0 + * @param[out] result_set Result set to store query results + * @param tag_filter Optional tag filter condition for filtering data by tag columns + * @param batch_size Batch reading size + * @return 0 on success, non-zero error code on failure + */ + int queryByRow(const std::string& table_name, + const std::vector& column_names, int offset, + int limit, ResultSet*& result_set, + Filter* tag_filter = nullptr, int batch_size = 0); + + /** + * @brief Performs a table query on the tree model. * - * @param table_name the table name - * @return std::vector> the device id list + * @param measurement_names List of measurement names + * @param start_time Start timestamp + * @param end_time End timestamp + * @param result_set Result set + * @return 0 on success, non-zero error code on failure + */ + int query_table_on_tree(const std::vector& measurement_names, + int64_t start_time, int64_t end_time, + ResultSet*& result_set); + /** + * @brief Destroys the result set. This method should be called after the query is completed and the result set is no longer used. + * + * @param qds Result set object + */ + void destroy_query_data_set(ResultSet* qds); + /** + * @brief Reads time-series data by device ID and measurement names. + * + * @param device_id Device ID + * @param measurement_name List of measurement names + * @return Result set object + */ + ResultSet* read_timeseries( + const std::shared_ptr& device_id, + const std::vector& measurement_name); + /** + * @brief Gets all devices in the TsFile for a specified table. + * + * @param table_name Table name + * @return List of device IDs */ std::vector> get_all_devices( std::string table_name); + /** - * @brief get the timeseries schema by the device id and measurement name + * @brief Gets all device IDs in the TsFile. * - * @param [in] device_id the device id - * @param [out] result std::vector the measurement schema - * list - * @return Returns 0 on success, or a non-zero error code on failure. + * @return List of device IDs + */ + std::vector> get_all_device_ids(); + + /** + * @brief Gets all device IDs in the file (functionally identical to get_all_device_ids). + * + * @return List of devices + */ + std::vector> get_all_devices(); + + /** + * @brief Gets time-series schemas by device ID and measurement names. + * + * @param [in] device_id Device ID + * @param [out] result List of measurement schemas + * @return 0 on success, non-zero error code on failure */ int get_timeseries_schema(std::shared_ptr device_id, - std::vector &result); + std::vector& result); + + /** + * @brief Gets time-series metadata for specified devices. + * + * Only devices existing in the file are included in the result. + * Returns an empty map if the device ID list is empty. + * + * @param device_ids List of devices to query + * @return Mapping: Device ID -> List of time-series metadata (existing entries only) + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata( + const std::vector>& device_ids); + + /** + * @brief Gets time-series metadata for all devices in the file. + * + * @return Mapping: Device ID -> List of time-series metadata + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata(); + /** - * @brief get the table schema by the table name + * @brief Gets the table schema by table name. * - * @param table_name the table name - * @return std::shared_ptr the table schema + * @param table_name Table name + * @return Shared pointer to the table schema */ std::shared_ptr get_table_schema( - const std::string &table_name); + const std::string& table_name); /** - * @brief get all table schemas in the tsfile + * @brief Gets all table schemas in the TsFile. * - * @return std::vector> the table schema list + * @return List of table schemas */ std::vector> get_all_table_schemas(); }; diff --git a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md index 849f1c000..f74354b0d 100644 --- a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md +++ b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md @@ -147,52 +147,141 @@ class Tablet(object) ```python class TsFileReader: """ - Query table data from a TsFile. - """ - - """ - Initialize a TsFile reader for the specified file path. - :param pathname: The path to the TsFile. - :return no return value. - """ - def __init__(self, pathname) - - - """ - Executes a time range query on the specified table and columns. - - :param table_name: The name of the table to query. - :param column_names: A list of column names to retrieve. - :param start_time: The start time of the query range (default: minimum int64 value). - :param end_time: The end time of the query range (default: maximum int64 value). - :return: A query result set handler. - """ - def query_table(self, table_name : str, column_names : List[str], - start_time : int = np.iinfo(np.int64).min, - end_time: int = np.iinfo(np.int64).max) -> ResultSet - - """ - Retrieves the schema of the specified table. - - :param table_name: The name of the table. - :return: The schema of the specified table. - """ - def get_table_schema(self, table_name : str)-> TableSchema - - - """ - Retrieves the schemas of all tables in the TsFile. - - :return: A dictionary mapping table names to their schemas. - """ - def get_all_table_schemas(self) ->dict[str, TableSchema] - - - """ - Closes the TsFile reader. If the reader has active result sets, they will be invalidated. - """ - def close(self) - + Query table data and time-series data from TsFile, providing standardized file reading and query interfaces. + Supports full core capabilities including table model query, tree model query, metadata acquisition, and resource management. + """ + + def __init__(self, pathname: str): + """ + Initialize the TsFile reader for the specified path, complete file loading and underlying reader initialization, + and maintain all active query result sets to ensure all result sets are invalidated synchronously when the reader is closed. + + :param pathname: Full path of the TsFile to be read + :return: No return value + """ + + def query_table(self, table_name: str, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + Perform time-range query on the specified table and columns, supporting tag filtering and batch reading mode. + Adapts to both row-by-row return and fixed-size data block return modes to meet reading requirements in different scenarios. + + :param table_name: Name of the target table to query, case-insensitive + :param column_names: List of target column names to retrieve; all columns are queried by default if empty + :param start_time: Start timestamp of the query range, default is the minimum value of int64 type + :param end_time: End timestamp of the query range, default is the maximum value of int64 type + :param tag_filter: Optional parameter, filter conditions based on tag columns, supporting equality, range, and logical combination filters + :param batch_size: Batch reading size; row-by-row mode is enabled when ≤ 0, data blocks are returned by the specified size when > 0 + :return: Encapsulated query result set handler for traversing data, reading data, and obtaining metadata + """ + + def query_table_on_tree(self, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max) -> object: + """ + Perform table query on the tree model structure, adapted for query scenarios of native tree-structured time-series data. + Query directly based on measurement names without specifying a table name; path names are case-sensitive. + + :param column_names: List of measurement names to query, corresponding to node paths in the tree structure + :param start_time: Start timestamp of the query range, default is the minimum value of int64 type + :param end_time: End timestamp of the query range, default is the maximum value of int64 type + :return: Result set handler corresponding to the tree model query + """ + + def query_tree_by_row(self, device_ids: List[str], measurement_names: List[str], + offset: int = 0, limit: int = -1) -> object: + """ + Query tree model time-series data by row with pagination, supporting offset skipping and maximum return row limit. + Adapted for large data volume pagination reading to avoid memory overflow caused by loading excessive data at once. + + :param device_ids: List of device IDs to query, cannot be empty + :param measurement_names: List of measurement names to query, cannot be empty + :param offset: Number of starting rows to skip, starting from 0 by default + :param limit: Maximum number of rows to return; no limit if less than 0 + :return: Result set handler for tree model pagination query + """ + + def query_table_by_row(self, table_name: str, column_names: List[str], + offset: int = 0, limit: int = -1, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + Query table model data by row with pagination, supporting offset and row limit pushdown, and can be used with tag filtering. + Invalid data can be skipped at the data block level for dense devices, greatly improving pagination query efficiency. + + :param table_name: Name of the target table to query + :param column_names: List of column names to query + :param offset: Number of starting rows to skip, starting from 0 by default + :param limit: Maximum number of rows to return; no limit if less than 0 + :param tag_filter: Optional parameter, tag filter condition to filter device data that meets the criteria + :param batch_size: Batch reading size, adapted to the underlying data block reading logic + :return: Result set handler for table model pagination query + """ + + def query_timeseries(self, device_name: str, sensor_list: List[str], + start_time: int = 0, end_time: int = 0) -> object: + """ + Perform time-range time-series data query for a single specified device. + Adapted for precise query scenarios of a single device with multiple sensors, simplifying query invocation logic. + + :param device_name: Name/path of the target device + :param sensor_list: List of sensor (measurement) names to query + :param start_time: Query start timestamp; starts from the earliest time of the file by default if 0 + :param end_time: Query end timestamp; ends at the latest time of the file by default if 0 + :return: Result set handler for single-device time-series query + """ + + def get_table_schema(self, table_name: str) -> object: + """ + Get the complete schema information of the specified table, including full metadata such as column names, data types, tag columns, and time-series constraints. + Used to verify the legality of query fields in advance and parse data structures. + + :param table_name: Name of the target table + :return: Schema information object of the corresponding table, containing full configuration of the table structure + """ + + def get_all_table_schemas(self) -> Dict[str, object]: + """ + Get schema information of all tables in the current TsFile. + Traverse all data table structures in the file with one click without querying table by table. + + :return: Dictionary structure, key is table name, value is schema information object of the corresponding table + """ + + def get_all_timeseries_schemas(self) -> List[object]: + """ + Get schema information of all time-series in the TsFile. + Covers field, type, and constraint information of full time-series data in both tree model and table model. + + :return: List of all time-series schema information + """ + + def get_all_devices(self) -> List[str]: + """ + Get identification information of all devices in the TsFile. + Can traverse all devices in the file, adapted for full-device statistics and batch query pre-operations. + + :return: List composed of all device IDs/device paths + """ + + def get_timeseries_metadata(self, device_ids: Optional[List[str]] = None) -> Dict[str, object]: + """ + Get time-series metadata of specified devices, including data storage segments, field constraints, data ranges, etc. + Returns metadata of all devices by default if no device ID is passed, returns an empty dictionary if an empty list is passed. + + :param device_ids: Optional parameter, list of device IDs to query metadata for + :return: Dictionary structure, key is device path, value is time-series metadata group of the corresponding device + """ + + def close(self) -> None: + """ + Close the TsFile reader, release underlying file handles and memory resources. + Mark all current active query result sets as invalid and prohibit subsequent data reading operations. + No query or metadata acquisition operations can be performed after closing; the reader needs to be reinitialized. + """ ``` ### ResultSet diff --git a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md index 45db6f3ff..c08b06d1b 100644 --- a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md +++ b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md @@ -277,7 +277,7 @@ ERRNO tsfile_reader_close(TsFileReader reader); -### 查询表 / 获取下一行 +### 查询表 / 获取下一行 / 按行查询 ```C @@ -313,6 +313,42 @@ bool tsfile_result_set_next(ResultSet result_set, ERRNO* error_code); * @param result_set [输入] 有效的 ResultSet 句柄指针。 */ void free_tsfile_result_set(ResultSet* result_set); + +/** + * @brief 按行查询时间序列数据(树模型),支持偏移量与行数限制 + * + * @param reader [in] 有效的 TsFileReader 句柄,通过 tsfile_reader_new() 获取 + * @param device_ids [in] 设备 ID 数组 + * @param device_ids_len [in] 设备 ID 的数量 + * @param measurement_names [in] 测量项(传感器)名称数组 + * @param measurement_names_len [in] 测量项名称的数量 + * @param offset [in] 需要跳过的起始行数(必须 >= 0) + * @param limit [in] 最多返回的行数,< 0 表示不限制 + * @param err_code [out] 错误码,成功返回 E_OK(0) + * @return 成功返回结果集 ResultSet 句柄,失败返回 NULL + */ +ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, + char** device_ids, int device_ids_len, + char** measurement_names, + int measurement_names_len, int offset, + int limit, ERRNO* err_code); + +/** + * @brief 按行查询表模型数据,支持偏移量与行数限制下推 + * + * @param reader [in] 有效的 TsFileReader 句柄,通过 tsfile_reader_new() 获取 + * @param table_name [in] 目标表名 + * @param column_names [in] 要查询的列名数组 + * @param column_names_len [in] 要查询的列数量 + * @param offset [in] 需要跳过的起始行数(必须 >= 0) + * @param limit [in] 最多返回的行数,< 0 表示不限制 + * @param err_code [out] 错误码,成功返回 E_OK(0) + * @return 成功返回结果集 ResultSet 句柄,失败返回 NULL + */ +ResultSet tsfile_reader_query_table_by_row( + TsFileReader reader, const char* table_name, char** column_names, + int column_names_len, int offset, int limit, TagFilterHandle tag_filter, + int batch_size, ERRNO* err_code); ``` diff --git a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md index 494cc34ab..e98fd6111 100644 --- a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md +++ b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md @@ -18,7 +18,7 @@ under the License. --> -# 接口定义 - Cpp +# 接口定义 - C++ ## 写入接口 @@ -27,55 +27,89 @@ 用于写入 TsFile. ```cpp +namespace storage { +class RestorableTsFileIOWriter; + /** - * @brief 用于将结构化表格数据写入具有指定模式的 TsFile。 + * @brief 支持按照指定表结构,将结构化表数据写入 TsFile 文件 * - * TsFileTableWriter 类被设计用于写入结构化数据,特别适合时序数据, - * 数据将被写入一种为高效存储与检索优化的文件格式(即 TsFile)。该类允许用户定义 - * 所需写入表的模式,按照该模式添加数据行,并将这些数据序列化写入 TsFile。 - * 此外,还提供了在写入过程中限制内存使用的选项。 + * TsFileTableWriter 类用于将结构化数据(特别适用于时序数据) + * 写入专为高效存储与查询优化的 TsFile 文件。 + * 使用者可定义待写入表的结构,按照该结构添加数据行, + * 并将数据序列化为 TsFile。 + * 同时,该类提供写入过程中的内存使用限制能力。 */ class TsFileTableWriter { public: /** - * TsFileTableWriter 用于将表格数据写入具有指定模式的目标文件, - * 可选地限制内存使用。 + * TsFileTableWriter 用于根据指定的表结构,将表数据写入目标文件, + * 并可选择性地限制内存使用量。 * - * @param writer_file 要写入表数据的目标文件。不能为空。 - * @param table_schema 用于构造表结构,定义正在写入表的模式。 - * @param memory_threshold 可选参数。当写入数据的大小超过该值时, - * 数据将自动刷新到磁盘。默认值为 128MB。 + * @param writer_file 表数据的目标写入文件,不能为空指针 + * @param table_schema 用于构建表结构,定义待写入表的 schema + * @param memory_threshold 可选参数。当已写入数据量超过该阈值时, + * 数据将自动刷新到磁盘。默认值为 128MB */ + template + explicit TsFileTableWriter(storage::WriteFile* writer_file, T* table_schema, + uint64_t memory_threshold = 128 * 1024 * 1024) { + static_assert(!std::is_same::value, + "table_schema cannot be nullptr"); + tsfile_writer_ = std::make_shared(); + tsfile_writer_->init(writer_file); + tsfile_writer_->set_generate_table_schema(false); + + // 执行深拷贝。源 TableSchema 对象可能分配在栈/堆上 + auto table_schema_ptr = std::make_shared(*table_schema); + error_number = tsfile_writer_->register_table(table_schema_ptr); + exclusive_table_name_ = table_schema->get_table_name(); + common::g_config_value_.chunk_group_size_threshold_ = memory_threshold; + } - TsFileTableWriter(WriteFile* writer_file, - TableSchema* table_schema, - uint64_t memory_threshold = 128 * 1024 * 1024); - ~TsFileTableWriter(); /** - * 将给定的 Tablet 数据按照表的模式写入目标文件。 + * 通过可恢复的 TsFileIOWriter 构建 TsFileTableWriter, + * 支持在故障恢复后追加表数据。Schema 从已恢复的文件中读取, + * 无需额外传入 TableSchema。 * - * @param tablet 包含待写入数据的 Tablet。不能为空。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param restorable_writer 已恢复的 I/O 写入器;不能为空指针, + * 且必须以截断模式打开,保证 can_write() 返回 true + * @param memory_threshold 可选的缓存数据内存阈值 */ + explicit TsFileTableWriter( + storage::RestorableTsFileIOWriter* restorable_writer, + uint64_t memory_threshold = 128 * 1024 * 1024); - int write_table(const Tablet& tablet); /** - * 将所有缓冲数据刷新到底层存储介质,确保所有数据都已写出。 - * 此方法确保所有未完成的写入操作被持久化。 + * 向写入器注册表结构 * - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param table_schema 待注册的表结构,不能为空指针 + * @return 成功返回 0,失败返回非零错误码 + */ + int register_table(const std::shared_ptr& table_schema); + /** + * 根据表结构,将指定的 Tablet 数据写入目标文件 + * + * @param tablet 包含待写入数据的 Tablet,不能为空指针 + * @return 成功返回 0,失败返回非零错误码 + */ + int write_table(Tablet& tablet) const; + /** + * 将所有缓存数据刷新到底层存储介质,确保所有数据都被持久化。 + * 该方法保证所有待写入数据都被落盘。 + * + * @return 成功返回 0,失败返回非零错误码 */ - int flush(); /** * 关闭写入器并释放其占用的所有资源。 - * 调用此方法后,不应再对该实例执行任何操作。 + * 调用该方法后,不应对当前实例执行任何后续操作。 * - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @return 成功返回 0,失败返回非零错误码 */ - int close(); }; + +} // namespace storage ``` ### TableSchema @@ -153,7 +187,6 @@ enum TSDataType : uint8_t { ### Tablet - ```cpp /** * @brief 表示用于插入到表中的数据行集合及其相关元数据。 @@ -218,120 +251,248 @@ public: }; ``` +### RestorableTsFileIOWriter +> V2.3.0 + +```cpp +namespace storage { +/** + * RestorableTsFileIOWriter 用于打开 TsFile 并对其进行可选的恢复操作 + * 继承自 TsFileIOWriter,支持在文件恢复后继续写入 + * + * (1) 若 TsFile 正常关闭:has_crashed()=false,can_write()=false + * + * (2) 若 TsFile 不完整/程序崩溃:has_crashed()=true, + * can_write()=true,写入器会截断损坏数据并允许继续写入 + * + * 基于标准 C++11 实现,通过 RAII 和智能指针避免内存泄漏 + */ +class RestorableTsFileIOWriter : public TsFileIOWriter { + public: + RestorableTsFileIOWriter(); + + /** + * 打开 TsFile 用于恢复/追加写入 + * 使用 O_RDWR|O_CREAT 模式,不使用 O_TRUNC,因此会保留文件原有内容 + * + * @param file_path TsFile 文件路径 + * @param truncate_corrupted 若为 true,则截断损坏的数据; + * 若为 false,则不截断(不完整文件保持原样) + * @return 成功返回 E_OK,失败返回错误码 + */ + int open(const std::string& file_path, bool truncate_corrupted = true); + + /** + * 关闭文件 + */ + void close(); +}; + +} // namespace storage +``` + + ## 读取接口 ### Tsfile Reader ```cpp /** - * @brief TsFileReader 提供了查询所有以 .tsfile 为后缀的文件的能力。 + * @brief TsFileReader 提供查询所有后缀为 .tsfile 的文件的能力 * - * TsFileReader 旨在用于查询 .tsfile 文件,它支持树模型查询和表模型查询, - * 并支持查询元数据信息,如 TableSchema 和 TimeseriesSchema。 + * TsFileReader 专为查询 .tsfile 文件设计,支持树模型查询和表模型查询, + * 同时支持查询表结构(TableSchema)、时间序列结构(TimeseriesSchema)等元数据。 */ - class TsFileReader { public: TsFileReader(); - ~TsFileReader(); /** - * @brief 打开 tsfile 文件。 + * @brief 打开 tsfile 文件 * - * @param file_path 要打开的 tsfile 文件路径。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param file_path 待打开的 tsfile 文件路径 + * @return 成功返回0,失败返回非零错误码 */ - - int open(const std::string &file_path); + int open(const std::string& file_path); /** - * @brief 关闭 tsfile,查询完成后应调用此方法。 + * @brief 关闭 tsfile 文件,该方法应在查询完成后调用 * - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @return 成功返回0,失败返回非零错误码 */ int close(); /** - * @brief 通过查询表达式对 tsfile 进行查询,用户可以自行构造查询表达式来查询 tsfile。 + * @brief 通过查询表达式查询 tsfile 文件,用户可自行构造查询表达式进行查询 * - * @param [in] qe 查询表达式。 - * @param [out] ret_qds 查询结果集。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param [in] qe 查询表达式 + * @param [out] ret_qds 结果集 + * @return 成功返回0,失败返回非零错误码 */ - int query(storage::QueryExpression *qe, ResultSet *&ret_qds); + int query(storage::QueryExpression* qe, ResultSet*& ret_qds); /** - * @brief 通过路径列表、起始时间和结束时间查询 tsfile, - * 本方法使用树模型进行查询。 + * @brief 通过路径列表、起始时间和结束时间查询 tsfile 文件 + * 该方法用于树模型下的 tsfile 文件查询 * - * @param [in] path_list 路径列表。 - * @param [in] start_time 起始时间。 - * @param [in] end_time 结束时间。 - * @param [out] result_set 查询结果集。 + * @param [in] path_list 路径列表 + * @param [in] start_time 起始时间 + * @param [in] end_time 结束时间 + * @param [out] result_set 结果集 */ - int query(std::vector &path_list, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(std::vector& path_list, int64_t start_time, + int64_t end_time, ResultSet*& result_set); /** - * @brief 通过表名、列名、起始时间和结束时间查询 tsfile, - * 本方法使用表模型进行查询。 + * @brief 通过表名、列名、起始时间和结束时间查询 tsfile 文件 + * 该方法用于表模型下的 tsfile 文件查询 * - * @param [in] table_name 表名。 - * @param [in] columns_names 列名列表。 - * @param [in] start_time 起始时间。 - * @param [in] end_time 结束时间。 - * @param [out] result_set 查询结果集。 + * @param [in] table_name 表名 + * @param [in] columns_names 列名列表 + * @param [in] start_time 起始时间 + * @param [in] end_time 结束时间 + * @param [out] result_set 结果集 + * @param [in] batch_size 小于等于0表示逐行返回模式, + * 大于0表示按指定大小返回TsBlock数据块 */ - int query(const std::string &table_name, - const std::vector &columns_names, int64_t start_time, - int64_t end_time, ResultSet *&result_set); - + int query(const std::string& table_name, + const std::vector& columns_names, int64_t start_time, + int64_t end_time, ResultSet*& result_set, int batch_size = -1); + /** - * @brief 通过表名、列名、开始时间、结束时间和标签过滤器查询 tsfile。 - * 此方法用于通过表模型查询 tsfile。 + * @brief 通过表名、列名、起始时间、结束时间和标签过滤条件查询 tsfile 文件 + * 该方法用于表模型下的 tsfile 文件查询 * * @param [in] table_name 表名 - * @param [in] columns_names 列名 - * @param [in] start_time 开始时间 + * @param [in] columns_names 列名列表 + * @param [in] start_time 起始时间 * @param [in] end_time 结束时间 - * @param [in] tag_filter 标签过滤器 + * @param [in] tag_filter 标签过滤条件 * @param [out] result_set 结果集 */ int query(const std::string& table_name, const std::vector& columns_names, int64_t start_time, - int64_t end_time, ResultSet*& result_set, Filter* tag_filter); - + int64_t end_time, ResultSet*& result_set, Filter* tag_filter, + int batch_size = 0); + + /** + * @brief 基于偏移量和限制条数,按行查询树模型时间序列数据 + * + * @param path_list 待查询的完整路径(设备.测量项) + * @param offset 需要跳过的起始行数(>=0) + * @param limit 最大返回行数,小于0表示无限制 + * @param[out] result_set 存储查询结果的结果集 + * @return 成功返回0,失败返回非零错误码 + */ + int queryByRow(std::vector& path_list, int offset, int limit, + ResultSet*& result_set); + + /** + * @brief 基于偏移量和限制条数下推,按行查询表模型数据 + * + * 对于密集型设备(所有列行数相同), + * 偏移量/限制条数会通过SSI下推至数据块/数据页级别, + * 无需解码即可跳过整个数据块/数据页。 + * 对于稀疏型设备,偏移量/限制条数在行合并阶段生效。 + * 当设备总行数处于偏移量范围内时,可直接跳过整个设备。 + * + * @param table_name 待查询的表名 + * @param column_names 待查询的列名 + * @param offset 需要跳过的起始行数(>=0) + * @param limit 最大返回行数,小于0表示无限制 + * @param[out] result_set 存储查询结果的结果集 + * @param tag_filter 可选的标签过滤条件,用于按标签列过滤数据 + * @return 成功返回0,失败返回非零错误码 + */ + int queryByRow(const std::string& table_name, + const std::vector& column_names, int offset, + int limit, ResultSet*& result_set, + Filter* tag_filter = nullptr, int batch_size = 0); + /** - * @brief 销毁结果集,该方法应在查询完成并使用完 result_set 后调用。 + * @brief 在树模型上执行表查询 * - * @param qds 查询结果集。 + * @param measurement_names 测量项名称列表 + * @param star_time 起始时间 + * @param end_time 结束时间 + * @param result_set 结果集 */ - void destroy_query_data_set(ResultSet *qds); - ResultSet *read_timeseries( - const std::shared_ptr &device_id, - const std::vector &measurement_name); + int query_table_on_tree(const std::vector& measurement_names, + int64_t star_time, int64_t end_time, + ResultSet*& result_set); /** - * @brief 获取 tsfile 中的所有设备。 + * @brief 销毁结果集,该方法应在查询完成、使用完结果集后调用 * - * @param table_name 表名。 - * @return std::vector> 设备 ID 列表。 + * @param qds 结果集对象 + */ + void destroy_query_data_set(ResultSet* qds); + /** + * @brief 根据设备ID和测量项名称读取时间序列数据 + * + * @param device_id 设备ID + * @param measurement_name 测量项名称列表 + * @return 结果集对象 + */ + ResultSet* read_timeseries( + const std::shared_ptr& device_id, + const std::vector& measurement_name); + /** + * @brief 获取 tsfile 文件中的所有设备 + * + * @param table_name 表名 + * @return 设备ID列表 */ std::vector> get_all_devices( std::string table_name); + /** - * @brief 根据设备 ID 和测量名称获取时间序列模式信息。 + * @brief 获取 tsfile 文件中的所有设备 * - * @param [in] device_id 设备 ID。 - * @param [out] result std::vector 测量模式列表。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @return 设备ID列表 + */ + std::vector> get_all_device_ids(); + + /** + * @brief 获取文件中的所有设备ID(与get_all_device_ids功能一致) + * + * @return 设备列表 + */ + std::vector> get_all_devices(); + + /** + * @brief 根据设备ID和测量项名称获取时间序列结构 + * + * @param [in] device_id 设备ID + * @param [out] result 测量项结构列表 + * @return 成功返回0,失败返回非零错误码 */ int get_timeseries_schema(std::shared_ptr device_id, - std::vector &result); + std::vector& result); + + /** + * @brief 获取指定设备的时间序列元数据 + * + * 仅文件中存在的设备会被包含在结果中 + * 若设备ID列表为空,返回空映射表 + * + * @param device_ids 待查询的设备列表 + * @return 映射关系:设备ID -> 时间序列元数据列表(仅包含存在的数据) + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata( + const std::vector>& device_ids); + + /** + * @brief 获取文件中所有设备的时间序列元数据 + * + * @return 映射关系:设备ID -> 时间序列元数据列表 + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata(); + /** - * @brief 根据表名获取表的模式信息。 + * @brief 根据表名获取表结构 * - * @param table_name 表名。 - * @return std::shared_ptr 表的模式信息。 + * @param table_name 表名 + * @return 表结构智能指针 */ std::shared_ptr get_table_schema( - const std::string &table_name); + const std::string& table_name); /** - * @brief 获取 tsfile 中所有表的模式信息。 + * @brief 获取 tsfile 文件中的所有表结构 * - * @return std::vector> 表模式信息列表。 + * @return 表结构列表 */ std::vector> get_all_table_schemas(); }; diff --git a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md index 60515bff4..906ca0112 100644 --- a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md +++ b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md @@ -141,50 +141,140 @@ class Tablet(object) ```python class TsFileReader: """ - 从 TsFile 中查询表格数据。 + 从 TsFile 中查询表格数据、时序数据,提供标准化的文件读取与查询接口, + 支持表模型查询、树模型查询、元数据获取、资源管控等全量核心能力。 """ - """ - 初始化指定路径的 TsFile 读取器。 - :param pathname: TsFile 文件的路径。 - :return: 无返回值。 - """ - def __init__(self, pathname) + def __init__(self, pathname: str): + """ + 初始化指定路径的 TsFile 读取器,完成文件加载与底层读取器初始化, + 同时维护当前所有活跃的查询结果集,确保读取器关闭时同步失效所有结果集。 + :param pathname: 待读取的 TsFile 文件的完整路径 + :return: 无返回值 + """ - """ - 对指定的表和列执行时间范围查询。 + def query_table(self, table_name: str, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + 对指定的表和列执行时间范围查询,支持标签过滤与批量读取模式。 + 可适配逐行返回与固定大小数据块返回两种模式,满足不同场景的读取需求。 + + :param table_name: 要查询的目标表名,不区分大小写 + :param column_names: 要检索的目标列名列表,为空时默认查询全列 + :param start_time: 查询范围的起始时间戳,默认值为 int64 类型最小值 + :param end_time: 查询范围的结束时间戳,默认值为 int64 类型最大值 + :param tag_filter: 可选参数,基于标签列的过滤条件,支持等值、范围、逻辑组合过滤 + :param batch_size: 批量读取大小,小于等于0时启用逐行返回模式,大于0时按指定大小返回数据块 + :return: 封装完成的查询结果集处理器,可用于遍历、读取数据、获取元数据 + """ - :param table_name: 要查询的表名。 - :param column_names: 要检索的列名列表。 - :param start_time: 查询范围的起始时间(默认:int64 最小值)。 - :param end_time: 查询范围的结束时间(默认:int64 最大值)。 - :return: 查询结果集处理器。 - """ - def query_table(self, table_name : str, column_names : List[str], - start_time : int = np.iinfo(np.int64).min, - end_time: int = np.iinfo(np.int64).max) -> ResultSet + def query_table_on_tree(self, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max) -> object: + """ + 在树模型结构上执行表查询,适配原生树结构时序数据的查询场景, + 直接基于测量项名称查询,无需指定表名,路径名称区分大小写。 - """ - 获取指定表的模式信息。 + :param column_names: 待查询的测量项名称列表,对应树结构中的节点路径 + :param start_time: 查询范围的起始时间戳,默认值为 int64 类型最小值 + :param end_time: 查询范围的结束时间戳,默认值为 int64 类型最大值 + :return: 树模型查询对应的结果集处理器 + """ - :param table_name: 表名。 - :return: 指定表的模式信息。 - """ - def get_table_schema(self, table_name : str) -> TableSchema + def query_tree_by_row(self, device_ids: List[str], measurement_names: List[str], + offset: int = 0, limit: int = -1) -> object: + """ + 按行分页查询树模型时序数据,支持偏移量跳过、最大返回行数限制, + 适配大数据量分页读取场景,避免单次加载过多数据导致内存溢出。 + + :param device_ids: 待查询的设备ID列表,不能为空 + :param measurement_names: 待查询的测量项名称列表,不能为空 + :param offset: 需要跳过的起始行数,默认从0开始 + :param limit: 最大返回行数,小于0表示不限制返回行数 + :return: 树模型分页查询的结果集处理器 + """ - """ - 获取 TsFile 中所有表的模式信息。 + def query_table_by_row(self, table_name: str, column_names: List[str], + offset: int = 0, limit: int = -1, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + 按行分页查询表模型数据,支持偏移量与行数限制下推,可结合标签过滤使用, + 密集型设备可在数据块级别跳过无效数据,大幅提升分页查询效率。 + + :param table_name: 待查询的目标表名 + :param column_names: 待查询的列名列表 + :param offset: 需要跳过的起始行数,默认从0开始 + :param limit: 最大返回行数,小于0表示不限制返回行数 + :param tag_filter: 可选参数,标签过滤条件,过滤符合条件的设备数据 + :param batch_size: 批量读取大小,适配底层数据块读取逻辑 + :return: 表模型分页查询的结果集处理器 + """ - :return: 一个将表名映射到其模式的字典。 - """ - def get_all_table_schemas(self) -> dict[str, TableSchema] + def query_timeseries(self, device_name: str, sensor_list: List[str], + start_time: int = 0, end_time: int = 0) -> object: + """ + 针对单个指定设备,执行时间范围时序数据查询, + 适配单设备多传感器的精准查询场景,简化查询调用逻辑。 + + :param device_name: 目标设备的名称/路径 + :param sensor_list: 待查询的传感器(测量项)名称列表 + :param start_time: 查询起始时间戳,为0时默认从文件最早时间开始 + :param end_time: 查询结束时间戳,为0时默认到文件最晚时间结束 + :return: 单设备时序查询的结果集处理器 + """ - """ - 关闭 TsFile 读取器。如果读取器中有活动的结果集,它们将失效。 - """ - def close(self) + def get_table_schema(self, table_name: str) -> object: + """ + 获取指定表的完整模式信息,包含列名、数据类型、标签列、时序约束等全量元数据, + 用于提前校验查询字段合法性、解析数据结构。 + + :param table_name: 目标表名 + :return: 对应表的模式信息对象,包含表结构全量配置 + """ + + def get_all_table_schemas(self) -> Dict[str, object]: + """ + 获取当前 TsFile 文件中所有表的模式信息, + 一键遍历文件内全部数据表结构,无需逐个表查询。 + :return: 字典结构,key为表名,value为对应表的模式信息对象 + """ + + def get_all_timeseries_schemas(self) -> List[object]: + """ + 获取 TsFile 内所有时序序列的模式信息, + 覆盖树模型、表模型全量时序数据的字段、类型、约束信息。 + :return: 所有时序模式信息组成的列表 + """ + + def get_all_devices(self) -> List[str]: + """ + 获取 TsFile 文件内所有设备的标识信息, + 可遍历文件内全部设备,适配全设备统计、批量查询前置操作。 + + :return: 所有设备ID/设备路径组成的列表 + """ + + def get_timeseries_metadata(self, device_ids: Optional[List[str]] = None) -> Dict[str, object]: + """ + 获取指定设备的时序元数据,包含数据存储分段、字段约束、数据范围等信息, + 不传设备ID时默认返回全设备元数据,传入空列表返回空字典。 + + :param device_ids: 可选参数,待查询元数据的设备ID列表 + :return: 字典结构,key为设备路径,value为对应设备的时序元数据组 + """ + + def close(self) -> None: + """ + 关闭 TsFile 读取器,释放底层文件句柄、内存资源, + 同时将当前所有活跃的查询结果集标记为失效,禁止后续数据读取操作。 + 关闭后不可再次执行查询、元数据获取操作,需重新初始化读取器。 + """ ``` ### ResultSet @@ -265,6 +355,7 @@ class ResultSet: ``` + ### to_dataframe ```Python @@ -333,4 +424,3 @@ def to_dataframe(file_path: str, """ ``` - diff --git a/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md b/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md index 45db6f3ff..fc22b5782 100644 --- a/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md +++ b/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-C.md @@ -277,10 +277,9 @@ ERRNO tsfile_reader_close(TsFileReader reader); -### 查询表 / 获取下一行 +### 查询表 / 获取下一行 / 按行查询 ```C - /** * @brief 在指定时间范围内,从指定表和列中查询数据。 * @@ -313,6 +312,42 @@ bool tsfile_result_set_next(ResultSet result_set, ERRNO* error_code); * @param result_set [输入] 有效的 ResultSet 句柄指针。 */ void free_tsfile_result_set(ResultSet* result_set); + +/** + * @brief 按行查询时间序列数据(树模型),支持偏移量与行数限制 + * + * @param reader [in] 有效的 TsFileReader 句柄,通过 tsfile_reader_new() 获取 + * @param device_ids [in] 设备 ID 数组 + * @param device_ids_len [in] 设备 ID 的数量 + * @param measurement_names [in] 测量项(传感器)名称数组 + * @param measurement_names_len [in] 测量项名称的数量 + * @param offset [in] 需要跳过的起始行数(必须 >= 0) + * @param limit [in] 最多返回的行数,< 0 表示不限制 + * @param err_code [out] 错误码,成功返回 E_OK(0) + * @return 成功返回结果集 ResultSet 句柄,失败返回 NULL + */ +ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, + char** device_ids, int device_ids_len, + char** measurement_names, + int measurement_names_len, int offset, + int limit, ERRNO* err_code); + +/** + * @brief 按行查询表模型数据,支持偏移量与行数限制下推 + * + * @param reader [in] 有效的 TsFileReader 句柄,通过 tsfile_reader_new() 获取 + * @param table_name [in] 目标表名 + * @param column_names [in] 要查询的列名数组 + * @param column_names_len [in] 要查询的列数量 + * @param offset [in] 需要跳过的起始行数(必须 >= 0) + * @param limit [in] 最多返回的行数,< 0 表示不限制 + * @param err_code [out] 错误码,成功返回 E_OK(0) + * @return 成功返回结果集 ResultSet 句柄,失败返回 NULL + */ +ResultSet tsfile_reader_query_table_by_row( + TsFileReader reader, const char* table_name, char** column_names, + int column_names_len, int offset, int limit, TagFilterHandle tag_filter, + int batch_size, ERRNO* err_code); ``` diff --git a/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md b/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md index a652c81d4..b0aeb55d8 100644 --- a/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md +++ b/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-CPP.md @@ -27,55 +27,89 @@ 用于写入 TsFile. ```cpp +namespace storage { +class RestorableTsFileIOWriter; + /** - * @brief 用于将结构化表格数据写入具有指定模式的 TsFile。 + * @brief 支持按照指定表结构,将结构化表数据写入 TsFile 文件 * - * TsFileTableWriter 类被设计用于写入结构化数据,特别适合时序数据, - * 数据将被写入一种为高效存储与检索优化的文件格式(即 TsFile)。该类允许用户定义 - * 所需写入表的模式,按照该模式添加数据行,并将这些数据序列化写入 TsFile。 - * 此外,还提供了在写入过程中限制内存使用的选项。 + * TsFileTableWriter 类用于将结构化数据(特别适用于时序数据) + * 写入专为高效存储与查询优化的 TsFile 文件。 + * 使用者可定义待写入表的结构,按照该结构添加数据行, + * 并将数据序列化为 TsFile。 + * 同时,该类提供写入过程中的内存使用限制能力。 */ class TsFileTableWriter { public: /** - * TsFileTableWriter 用于将表格数据写入具有指定模式的目标文件, - * 可选地限制内存使用。 + * TsFileTableWriter 用于根据指定的表结构,将表数据写入目标文件, + * 并可选择性地限制内存使用量。 * - * @param writer_file 要写入表数据的目标文件。不能为空。 - * @param table_schema 用于构造表结构,定义正在写入表的模式。 - * @param memory_threshold 可选参数。当写入数据的大小超过该值时, - * 数据将自动刷新到磁盘。默认值为 128MB。 + * @param writer_file 表数据的目标写入文件,不能为空指针 + * @param table_schema 用于构建表结构,定义待写入表的 schema + * @param memory_threshold 可选参数。当已写入数据量超过该阈值时, + * 数据将自动刷新到磁盘。默认值为 128MB */ + template + explicit TsFileTableWriter(storage::WriteFile* writer_file, T* table_schema, + uint64_t memory_threshold = 128 * 1024 * 1024) { + static_assert(!std::is_same::value, + "table_schema cannot be nullptr"); + tsfile_writer_ = std::make_shared(); + tsfile_writer_->init(writer_file); + tsfile_writer_->set_generate_table_schema(false); + + // 执行深拷贝。源 TableSchema 对象可能分配在栈/堆上 + auto table_schema_ptr = std::make_shared(*table_schema); + error_number = tsfile_writer_->register_table(table_schema_ptr); + exclusive_table_name_ = table_schema->get_table_name(); + common::g_config_value_.chunk_group_size_threshold_ = memory_threshold; + } - TsFileTableWriter(WriteFile* writer_file, - TableSchema* table_schema, - uint64_t memory_threshold = 128 * 1024 * 1024); - ~TsFileTableWriter(); /** - * 将给定的 Tablet 数据按照表的模式写入目标文件。 + * 通过可恢复的 TsFileIOWriter 构建 TsFileTableWriter, + * 支持在故障恢复后追加表数据。Schema 从已恢复的文件中读取, + * 无需额外传入 TableSchema。 * - * @param tablet 包含待写入数据的 Tablet。不能为空。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param restorable_writer 已恢复的 I/O 写入器;不能为空指针, + * 且必须以截断模式打开,保证 can_write() 返回 true + * @param memory_threshold 可选的缓存数据内存阈值 */ + explicit TsFileTableWriter( + storage::RestorableTsFileIOWriter* restorable_writer, + uint64_t memory_threshold = 128 * 1024 * 1024); - int write_table(const Tablet& tablet); /** - * 将所有缓冲数据刷新到底层存储介质,确保所有数据都已写出。 - * 此方法确保所有未完成的写入操作被持久化。 + * 向写入器注册表结构 * - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param table_schema 待注册的表结构,不能为空指针 + * @return 成功返回 0,失败返回非零错误码 + */ + int register_table(const std::shared_ptr& table_schema); + /** + * 根据表结构,将指定的 Tablet 数据写入目标文件 + * + * @param tablet 包含待写入数据的 Tablet,不能为空指针 + * @return 成功返回 0,失败返回非零错误码 + */ + int write_table(Tablet& tablet) const; + /** + * 将所有缓存数据刷新到底层存储介质,确保所有数据都被持久化。 + * 该方法保证所有待写入数据都被落盘。 + * + * @return 成功返回 0,失败返回非零错误码 */ - int flush(); /** * 关闭写入器并释放其占用的所有资源。 - * 调用此方法后,不应再对该实例执行任何操作。 + * 调用该方法后,不应对当前实例执行任何后续操作。 * - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @return 成功返回 0,失败返回非零错误码 */ - int close(); }; + +} // namespace storage ``` ### TableSchema @@ -153,7 +187,6 @@ enum TSDataType : uint8_t { ### Tablet - ```cpp /** * @brief 表示用于插入到表中的数据行集合及其相关元数据。 @@ -218,124 +251,253 @@ public: }; ``` +### RestorableTsFileIOWriter +> V2.3.0 + +```cpp +namespace storage { +/** + * RestorableTsFileIOWriter 用于打开 TsFile 并对其进行可选的恢复操作 + * 继承自 TsFileIOWriter,支持在文件恢复后继续写入 + * + * (1) 若 TsFile 正常关闭:has_crashed()=false,can_write()=false + * + * (2) 若 TsFile 不完整/程序崩溃:has_crashed()=true, + * can_write()=true,写入器会截断损坏数据并允许继续写入 + * + * 基于标准 C++11 实现,通过 RAII 和智能指针避免内存泄漏 + */ +class RestorableTsFileIOWriter : public TsFileIOWriter { + public: + RestorableTsFileIOWriter(); + + /** + * 打开 TsFile 用于恢复/追加写入 + * 使用 O_RDWR|O_CREAT 模式,不使用 O_TRUNC,因此会保留文件原有内容 + * + * @param file_path TsFile 文件路径 + * @param truncate_corrupted 若为 true,则截断损坏的数据; + * 若为 false,则不截断(不完整文件保持原样) + * @return 成功返回 E_OK,失败返回错误码 + */ + int open(const std::string& file_path, bool truncate_corrupted = true); + + /** + * 关闭文件 + */ + void close(); +}; + +} // namespace storage +``` + + ## 读取接口 ### Tsfile Reader ```cpp /** - * @brief TsFileReader 提供了查询所有以 .tsfile 为后缀的文件的能力。 + * @brief TsFileReader 提供查询所有后缀为 .tsfile 的文件的能力 * - * TsFileReader 旨在用于查询 .tsfile 文件,它支持树模型查询和表模型查询, - * 并支持查询元数据信息,如 TableSchema 和 TimeseriesSchema。 + * TsFileReader 专为查询 .tsfile 文件设计,支持树模型查询和表模型查询, + * 同时支持查询表结构(TableSchema)、时间序列结构(TimeseriesSchema)等元数据。 */ - class TsFileReader { public: TsFileReader(); - ~TsFileReader(); /** - * @brief 打开 tsfile 文件。 + * @brief 打开 tsfile 文件 * - * @param file_path 要打开的 tsfile 文件路径。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param file_path 待打开的 tsfile 文件路径 + * @return 成功返回0,失败返回非零错误码 */ - - int open(const std::string &file_path); + int open(const std::string& file_path); /** - * @brief 关闭 tsfile,查询完成后应调用此方法。 + * @brief 关闭 tsfile 文件,该方法应在查询完成后调用 * - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @return 成功返回0,失败返回非零错误码 */ int close(); /** - * @brief 通过查询表达式对 tsfile 进行查询,用户可以自行构造查询表达式来查询 tsfile。 + * @brief 通过查询表达式查询 tsfile 文件,用户可自行构造查询表达式进行查询 * - * @param [in] qe 查询表达式。 - * @param [out] ret_qds 查询结果集。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @param [in] qe 查询表达式 + * @param [out] ret_qds 结果集 + * @return 成功返回0,失败返回非零错误码 */ - int query(storage::QueryExpression *qe, ResultSet *&ret_qds); + int query(storage::QueryExpression* qe, ResultSet*& ret_qds); /** - * @brief 通过路径列表、起始时间和结束时间查询 tsfile, - * 本方法使用树模型进行查询。 + * @brief 通过路径列表、起始时间和结束时间查询 tsfile 文件 + * 该方法用于树模型下的 tsfile 文件查询 * - * @param [in] path_list 路径列表。 - * @param [in] start_time 起始时间。 - * @param [in] end_time 结束时间。 - * @param [out] result_set 查询结果集。 + * @param [in] path_list 路径列表 + * @param [in] start_time 起始时间 + * @param [in] end_time 结束时间 + * @param [out] result_set 结果集 */ - int query(std::vector &path_list, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(std::vector& path_list, int64_t start_time, + int64_t end_time, ResultSet*& result_set); /** - * @brief 通过表名、列名、起始时间和结束时间查询 tsfile, - * 本方法使用表模型进行查询。 + * @brief 通过表名、列名、起始时间和结束时间查询 tsfile 文件 + * 该方法用于表模型下的 tsfile 文件查询 * - * @param [in] table_name 表名。 - * @param [in] columns_names 列名列表。 - * @param [in] start_time 起始时间。 - * @param [in] end_time 结束时间。 - * @param [out] result_set 查询结果集。 + * @param [in] table_name 表名 + * @param [in] columns_names 列名列表 + * @param [in] start_time 起始时间 + * @param [in] end_time 结束时间 + * @param [out] result_set 结果集 + * @param [in] batch_size 小于等于0表示逐行返回模式, + * 大于0表示按指定大小返回TsBlock数据块 */ - int query(const std::string &table_name, - const std::vector &columns_names, int64_t start_time, - int64_t end_time, ResultSet *&result_set); - + int query(const std::string& table_name, + const std::vector& columns_names, int64_t start_time, + int64_t end_time, ResultSet*& result_set, int batch_size = -1); + /** - * @brief 通过表名、列名、开始时间、结束时间和标签过滤器查询 tsfile。 - * 此方法用于通过表模型查询 tsfile。 + * @brief 通过表名、列名、起始时间、结束时间和标签过滤条件查询 tsfile 文件 + * 该方法用于表模型下的 tsfile 文件查询 * * @param [in] table_name 表名 - * @param [in] columns_names 列名 - * @param [in] start_time 开始时间 + * @param [in] columns_names 列名列表 + * @param [in] start_time 起始时间 * @param [in] end_time 结束时间 - * @param [in] tag_filter 标签过滤器 + * @param [in] tag_filter 标签过滤条件 * @param [out] result_set 结果集 */ int query(const std::string& table_name, const std::vector& columns_names, int64_t start_time, - int64_t end_time, ResultSet*& result_set, Filter* tag_filter); - + int64_t end_time, ResultSet*& result_set, Filter* tag_filter, + int batch_size = 0); + + /** + * @brief 基于偏移量和限制条数,按行查询树模型时间序列数据 + * + * @param path_list 待查询的完整路径(设备.测量项) + * @param offset 需要跳过的起始行数(>=0) + * @param limit 最大返回行数,小于0表示无限制 + * @param[out] result_set 存储查询结果的结果集 + * @return 成功返回0,失败返回非零错误码 + */ + int queryByRow(std::vector& path_list, int offset, int limit, + ResultSet*& result_set); + + /** + * @brief 基于偏移量和限制条数下推,按行查询表模型数据 + * + * 对于密集型设备(所有列行数相同), + * 偏移量/限制条数会通过SSI下推至数据块/数据页级别, + * 无需解码即可跳过整个数据块/数据页。 + * 对于稀疏型设备,偏移量/限制条数在行合并阶段生效。 + * 当设备总行数处于偏移量范围内时,可直接跳过整个设备。 + * + * @param table_name 待查询的表名 + * @param column_names 待查询的列名 + * @param offset 需要跳过的起始行数(>=0) + * @param limit 最大返回行数,小于0表示无限制 + * @param[out] result_set 存储查询结果的结果集 + * @param tag_filter 可选的标签过滤条件,用于按标签列过滤数据 + * @return 成功返回0,失败返回非零错误码 + */ + int queryByRow(const std::string& table_name, + const std::vector& column_names, int offset, + int limit, ResultSet*& result_set, + Filter* tag_filter = nullptr, int batch_size = 0); + + /** + * @brief 在树模型上执行表查询 + * + * @param measurement_names 测量项名称列表 + * @param star_time 起始时间 + * @param end_time 结束时间 + * @param result_set 结果集 + */ + int query_table_on_tree(const std::vector& measurement_names, + int64_t star_time, int64_t end_time, + ResultSet*& result_set); + /** + * @brief 销毁结果集,该方法应在查询完成、使用完结果集后调用 + * + * @param qds 结果集对象 + */ + void destroy_query_data_set(ResultSet* qds); /** - * @brief 销毁结果集,该方法应在查询完成并使用完 result_set 后调用。 + * @brief 根据设备ID和测量项名称读取时间序列数据 * - * @param qds 查询结果集。 + * @param device_id 设备ID + * @param measurement_name 测量项名称列表 + * @return 结果集对象 */ - void destroy_query_data_set(ResultSet *qds); - ResultSet *read_timeseries( - const std::shared_ptr &device_id, - const std::vector &measurement_name); + ResultSet* read_timeseries( + const std::shared_ptr& device_id, + const std::vector& measurement_name); /** - * @brief 获取 tsfile 中的所有设备。 + * @brief 获取 tsfile 文件中的所有设备 * - * @param table_name 表名。 - * @return std::vector> 设备 ID 列表。 + * @param table_name 表名 + * @return 设备ID列表 */ std::vector> get_all_devices( std::string table_name); + /** - * @brief 根据设备 ID 和测量名称获取时间序列模式信息。 + * @brief 获取 tsfile 文件中的所有设备 * - * @param [in] device_id 设备 ID。 - * @param [out] result std::vector 测量模式列表。 - * @return 成功时返回 0,失败时返回 errno_define.h 中的非零错误码。 + * @return 设备ID列表 + */ + std::vector> get_all_device_ids(); + + /** + * @brief 获取文件中的所有设备ID(与get_all_device_ids功能一致) + * + * @return 设备列表 + */ + std::vector> get_all_devices(); + + /** + * @brief 根据设备ID和测量项名称获取时间序列结构 + * + * @param [in] device_id 设备ID + * @param [out] result 测量项结构列表 + * @return 成功返回0,失败返回非零错误码 */ int get_timeseries_schema(std::shared_ptr device_id, - std::vector &result); + std::vector& result); + /** - * @brief 根据表名获取表的模式信息。 + * @brief 获取指定设备的时间序列元数据 * - * @param table_name 表名。 - * @return std::shared_ptr 表的模式信息。 + * 仅文件中存在的设备会被包含在结果中 + * 若设备ID列表为空,返回空映射表 + * + * @param device_ids 待查询的设备列表 + * @return 映射关系:设备ID -> 时间序列元数据列表(仅包含存在的数据) + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata( + const std::vector>& device_ids); + + /** + * @brief 获取文件中所有设备的时间序列元数据 + * + * @return 映射关系:设备ID -> 时间序列元数据列表 + */ + DeviceTimeseriesMetadataMap get_timeseries_metadata(); + + /** + * @brief 根据表名获取表结构 + * + * @param table_name 表名 + * @return 表结构智能指针 */ std::shared_ptr get_table_schema( - const std::string &table_name); + const std::string& table_name); /** - * @brief 获取 tsfile 中所有表的模式信息。 + * @brief 获取 tsfile 文件中的所有表结构 * - * @return std::vector> 表模式信息列表。 + * @return 表结构列表 */ std::vector> get_all_table_schemas(); }; ``` + ### ResultSet ```cpp /** diff --git a/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md b/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md index 08a4b2f6c..906ca0112 100644 --- a/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md +++ b/src/zh/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md @@ -141,50 +141,140 @@ class Tablet(object) ```python class TsFileReader: """ - 从 TsFile 中查询表格数据。 + 从 TsFile 中查询表格数据、时序数据,提供标准化的文件读取与查询接口, + 支持表模型查询、树模型查询、元数据获取、资源管控等全量核心能力。 """ - """ - 初始化指定路径的 TsFile 读取器。 - :param pathname: TsFile 文件的路径。 - :return: 无返回值。 - """ - def __init__(self, pathname) + def __init__(self, pathname: str): + """ + 初始化指定路径的 TsFile 读取器,完成文件加载与底层读取器初始化, + 同时维护当前所有活跃的查询结果集,确保读取器关闭时同步失效所有结果集。 + :param pathname: 待读取的 TsFile 文件的完整路径 + :return: 无返回值 + """ - """ - 对指定的表和列执行时间范围查询。 + def query_table(self, table_name: str, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + 对指定的表和列执行时间范围查询,支持标签过滤与批量读取模式。 + 可适配逐行返回与固定大小数据块返回两种模式,满足不同场景的读取需求。 + + :param table_name: 要查询的目标表名,不区分大小写 + :param column_names: 要检索的目标列名列表,为空时默认查询全列 + :param start_time: 查询范围的起始时间戳,默认值为 int64 类型最小值 + :param end_time: 查询范围的结束时间戳,默认值为 int64 类型最大值 + :param tag_filter: 可选参数,基于标签列的过滤条件,支持等值、范围、逻辑组合过滤 + :param batch_size: 批量读取大小,小于等于0时启用逐行返回模式,大于0时按指定大小返回数据块 + :return: 封装完成的查询结果集处理器,可用于遍历、读取数据、获取元数据 + """ - :param table_name: 要查询的表名。 - :param column_names: 要检索的列名列表。 - :param start_time: 查询范围的起始时间(默认:int64 最小值)。 - :param end_time: 查询范围的结束时间(默认:int64 最大值)。 - :return: 查询结果集处理器。 - """ - def query_table(self, table_name : str, column_names : List[str], - start_time : int = np.iinfo(np.int64).min, - end_time: int = np.iinfo(np.int64).max) -> ResultSet + def query_table_on_tree(self, column_names: List[str], + start_time: int = np.iinfo(np.int64).min, + end_time: int = np.iinfo(np.int64).max) -> object: + """ + 在树模型结构上执行表查询,适配原生树结构时序数据的查询场景, + 直接基于测量项名称查询,无需指定表名,路径名称区分大小写。 - """ - 获取指定表的模式信息。 + :param column_names: 待查询的测量项名称列表,对应树结构中的节点路径 + :param start_time: 查询范围的起始时间戳,默认值为 int64 类型最小值 + :param end_time: 查询范围的结束时间戳,默认值为 int64 类型最大值 + :return: 树模型查询对应的结果集处理器 + """ - :param table_name: 表名。 - :return: 指定表的模式信息。 - """ - def get_table_schema(self, table_name : str) -> TableSchema + def query_tree_by_row(self, device_ids: List[str], measurement_names: List[str], + offset: int = 0, limit: int = -1) -> object: + """ + 按行分页查询树模型时序数据,支持偏移量跳过、最大返回行数限制, + 适配大数据量分页读取场景,避免单次加载过多数据导致内存溢出。 + + :param device_ids: 待查询的设备ID列表,不能为空 + :param measurement_names: 待查询的测量项名称列表,不能为空 + :param offset: 需要跳过的起始行数,默认从0开始 + :param limit: 最大返回行数,小于0表示不限制返回行数 + :return: 树模型分页查询的结果集处理器 + """ - """ - 获取 TsFile 中所有表的模式信息。 + def query_table_by_row(self, table_name: str, column_names: List[str], + offset: int = 0, limit: int = -1, + tag_filter: Optional[object] = None, + batch_size: int = 0) -> object: + """ + 按行分页查询表模型数据,支持偏移量与行数限制下推,可结合标签过滤使用, + 密集型设备可在数据块级别跳过无效数据,大幅提升分页查询效率。 + + :param table_name: 待查询的目标表名 + :param column_names: 待查询的列名列表 + :param offset: 需要跳过的起始行数,默认从0开始 + :param limit: 最大返回行数,小于0表示不限制返回行数 + :param tag_filter: 可选参数,标签过滤条件,过滤符合条件的设备数据 + :param batch_size: 批量读取大小,适配底层数据块读取逻辑 + :return: 表模型分页查询的结果集处理器 + """ - :return: 一个将表名映射到其模式的字典。 - """ - def get_all_table_schemas(self) -> dict[str, TableSchema] + def query_timeseries(self, device_name: str, sensor_list: List[str], + start_time: int = 0, end_time: int = 0) -> object: + """ + 针对单个指定设备,执行时间范围时序数据查询, + 适配单设备多传感器的精准查询场景,简化查询调用逻辑。 + + :param device_name: 目标设备的名称/路径 + :param sensor_list: 待查询的传感器(测量项)名称列表 + :param start_time: 查询起始时间戳,为0时默认从文件最早时间开始 + :param end_time: 查询结束时间戳,为0时默认到文件最晚时间结束 + :return: 单设备时序查询的结果集处理器 + """ - """ - 关闭 TsFile 读取器。如果读取器中有活动的结果集,它们将失效。 - """ - def close(self) + def get_table_schema(self, table_name: str) -> object: + """ + 获取指定表的完整模式信息,包含列名、数据类型、标签列、时序约束等全量元数据, + 用于提前校验查询字段合法性、解析数据结构。 + + :param table_name: 目标表名 + :return: 对应表的模式信息对象,包含表结构全量配置 + """ + + def get_all_table_schemas(self) -> Dict[str, object]: + """ + 获取当前 TsFile 文件中所有表的模式信息, + 一键遍历文件内全部数据表结构,无需逐个表查询。 + + :return: 字典结构,key为表名,value为对应表的模式信息对象 + """ + + def get_all_timeseries_schemas(self) -> List[object]: + """ + 获取 TsFile 内所有时序序列的模式信息, + 覆盖树模型、表模型全量时序数据的字段、类型、约束信息。 + :return: 所有时序模式信息组成的列表 + """ + + def get_all_devices(self) -> List[str]: + """ + 获取 TsFile 文件内所有设备的标识信息, + 可遍历文件内全部设备,适配全设备统计、批量查询前置操作。 + :return: 所有设备ID/设备路径组成的列表 + """ + + def get_timeseries_metadata(self, device_ids: Optional[List[str]] = None) -> Dict[str, object]: + """ + 获取指定设备的时序元数据,包含数据存储分段、字段约束、数据范围等信息, + 不传设备ID时默认返回全设备元数据,传入空列表返回空字典。 + + :param device_ids: 可选参数,待查询元数据的设备ID列表 + :return: 字典结构,key为设备路径,value为对应设备的时序元数据组 + """ + + def close(self) -> None: + """ + 关闭 TsFile 读取器,释放底层文件句柄、内存资源, + 同时将当前所有活跃的查询结果集标记为失效,禁止后续数据读取操作。 + 关闭后不可再次执行查询、元数据获取操作,需重新初始化读取器。 + """ ``` ### ResultSet