// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/data_structure.h" #include "rocksdb/db.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { namespace experimental { // Supported only for Leveled compaction Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end); Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end); // DEPRECATED: this API may be removed in a future release. // This operation can be done through CompactRange() by setting // CompactRangeOptions::bottommost_level_compaction set to // BottommostLevelCompaction::kSkip and setting target level. // // Move all L0 files to target_level skipping compaction. // This operation succeeds only if the files in L0 have disjoint ranges; this // is guaranteed to happen, for instance, if keys are inserted in sorted // order. Furthermore, all levels between 1 and target_level must be empty. // If any of the above condition is violated, InvalidArgument will be // returned. Status PromoteL0(DB* db, ColumnFamilyHandle* column_family, int target_level = 1); // This function is intended to be called with the DB closed and does not write // to the DB path. It will usually work on an open DB but may fail spuriously // in that case, and results may be out of date on return. Status GetFileChecksumsFromCurrentManifest(FileSystem* fs, const std::string& dbname, FileChecksumList* checksum_list); struct UpdateManifestForFilesStateOptions { // When true, read current file temperatures from FileSystem and update in // DB manifest when a temperature other than Unknown is reported and // inconsistent with manifest. bool update_temperatures = true; // TODO: new_checksums: to update files to latest file checksum algorithm }; // Utility for updating manifest of DB directory (not open) for current state // of files on filesystem. See UpdateManifestForFilesStateOptions. // // To minimize interference with ongoing DB operations, only the following // guarantee is provided, assuming no IO error encountered: // * Only files live in DB at start AND end of call to // UpdateManifestForFilesState() are guaranteed to be updated (as needed) in // manifest. // * For example, new files after start of call to // UpdateManifestForFilesState() might not be updated, but that is not // typically required to achieve goal of manifest consistency/completeness // (because current DB configuration would ensure new files get the desired // consistent metadata). Status UpdateManifestForFilesState( const DBOptions& db_opts, const std::string& db_name, const std::vector& column_families, const UpdateManifestForFilesStateOptions& opts = {}); // **************************************************************************** // EXPERIMENTAL new filtering features // **************************************************************************** // KeySegmentsExtractor - A class for splitting a key into meaningful pieces, or // "segments" for filtering purposes. We say the first key segment has segment // ordinal 0, the second has segment ordinal 1, etc. To simplify satisfying some // filtering requirements, the segments must encompass a complete key prefix (or // the whole key). There cannot be gaps between segments (though segments are // allowed to be essentially unused), and segments cannot overlap. // // Keys can also be put in "categories" to simplify some configuration and // handling. A "legal" key or bound is one that does not return an error (as a // special, unused category) from the extractor. It is also allowed for all // keys in a category to return an empty sequence of segments. // // To eliminate a confusing distinction between a segment that is empty vs. // "not present" for a particular key, each key is logically assiciated with // an infinite sequence of segments, including some infinite tail of 0-length // segments. In practice, we only represent a finite sequence that (at least) // covers the non-trivial segments. // // Once in production, the behavior associated with a particular GetId() // cannot change. Introduce a new GetId() when introducing new behaviors. // See also SstQueryFilterConfigsManager below. // // This feature hasn't yet been validated with user timestamp. // // = A SIMPLIFIED MODEL = // Let us start with the easiest set of contraints to satisfy with a key // segments extractor that generally allows for correct point and range // filtering, and add complexity from there. Here we first assume // * The column family is using the byte-wise comparator, or reverse byte-wise // * A single category is assigned to all keys (by the extractor) // * Using simplified criteria for legal segment extraction, the "segment // maximal prefix property" // // SEGMENT MAXIMAL PREFIX PROPERTY: The segment that a byte is assigned to can // only depend on the bytes that come before it, not on the byte itself nor // anything later including the full length of the key or bound. // // Equivalently, two keys or bounds must agree on the segment assignment of // position i if the two keys share a common byte-wise prefix up to at least // position i - 1 (and i is within bounds of both keys). // // This specifically excludes "all or nothing" segments where it is only // included if it reaches a particular width or delimiter. A segment resembling // the FixedPrefixTransform would be illegal (without other assumptions); it // must be like CappedPrefixTransform. // // This basically matches the notion of parsing prefix codes (see // https://en.wikipedia.org/wiki/Prefix_code) except we have to include any // partial segment (code word) at the end whenever an extension to that key // might produce a full segment. An example would be parsing UTF-8 into // segments corresponding to encoded code points, where any incomplete code // at the end must be part of a trailing segment. Note a three-way // correspondence between // (a) byte-wise ordering of encoded code points, e.g. // { D0 98 E2 82 AC } // { E2 82 AC D0 98 } // (b) lexicographic-then-byte-wise ordering of segments that are each an // encoded code point, e.g. // {{ D0 98 } { E2 82 AC }} // {{ E2 82 AC } { D0 98 }} // and (c) lexicographic ordering of the decoded code points, e.g. // { U+0418 U+20AC } // { U+20AC U+0418 } // The correspondence between (a) and (b) is a result of the segment maximal // prefix property and is critical for correct application of filters to // range queries. The correspondence with (c) is a handy attribute of UTF-8 // (with no over-long encodings) and might be useful to the application. // // Example types of key segments that can be freely mixed in any order: // * Capped number of bytes or codewords. The number cap for the segment // could be the same for all keys or encoded earlier in the key. // * Up to *and including* a delimiter byte or codeword. // * Any/all remaining bytes to the end of the key, though this implies all // subsequent segments will be empty. // As part of the segment maximal prefix property, if the segments do not // extend to the end of the key, that must be implied by the bytes that are // in segments, NOT because the potential contents of a segment were considered // incomplete. // // For example, keys might consist of // * Segment 0: Any sequence of bytes up to and including the first ':' // character, or the whole key if no ':' is present. // * Segment 1: The next four bytes, or less if we reach end of key. // * Segment 2: An unsigned byte indicating the number of additional bytes in // the segment, and then that many bytes (or less up to the end of the key). // * Segment 3: Any/all remaining bytes in the key // // For an example of what can go wrong, consider using '4' as a delimiter // but not including it with the segment leading up to it. Suppose we have // these keys and corresponding first segments: // "123456" -> "123" (in file 1) // "124536" -> "12" (in file 2) // "125436" -> "125" (in file 1) // Notice how byte-wise comparator ordering of the segments does not follow // the ordering of the keys. This means we cannot safely use a filter with // a range of segment values for filtering key range queries. For example, // we might get a range query for ["123", "125Z") and miss that key "124536" // in file 2 is in range because its first segment "12" is out of the range // of the first segments on the bounds, "123" and "125". We cannot even safely // use this for prefix-like range querying with a Bloom filter on the segments. // For a query ["12", "124Z"), segment "12" would likely not match the Bloom // filter in file 1 and miss "123456". // // CATEGORIES: The KeySegmentsExtractor is allowed to place keys in categories // so that different parts of the key space can use different filtering // strategies. The following property is generally recommended for safe filter // applicability // * CATEGORY CONTIGUOUSNESS PROPERTY: each category is contiguous in // comparator order. In other words, any key between two keys of category c // must also be in category c. // An alternative to categories when distinct kinds of keys are interspersed // is to leave some segments empty when they do not apply to that key. // Filters are generally set up to handle an empty segment specially so that // it doesn't interfere with tracking accurate ranges on non-empty occurrences // of the segment. // // = BEYOND THE SIMPLIFIED MODEL = // // DETAILED GENERAL REQUIREMENTS (incl OTHER COMPARATORS): The exact // requirements on a key segments extractor depend on whether and how we use // filters to answer queries that they cannot answer directly. To understand // this, we describe // (A) the types of filters in terms of data they represent and can directly // answer queries about, // (B) the types of read queries that we want to use filters for, and // (C) the assumptions that need to be satisfied to connect those two. // // TYPES OF FILTERS: Although not exhaustive, here are some useful categories // of filter data: // * Equivalence class filtering - Represents or over-approximates a set of // equivalence classes on keys. The size of the representation is roughly // proportional to the number of equivalence classes added. Bloom and ribbon // filters are examples. // * Order-based filtering - Represents one or more subranges of a key space or // key segment space. A filter query only requires application of the CF // comparator. The size of the representation is roughly proportional to the // number of subranges and to the key or segment size. For example, we call a // simple filter representing a minimum and a maximum value for a segment a // min-max filter. // // TYPES OF READ QUERIES and their DIRECT FILTERS: // * Point query - Whether there {definitely isn't, might be} an entry for a // particular key in an SST file (or partition, etc.). // The DIRECT FILTER for a point query is an equivalence class filter on the // whole key. // * Range query - Whether there {definitely isn't, might be} any entries // within a lower and upper key bound, in an SST file (or partition, etc.). // NOTE: For this disucssion, we ignore the detail of inclusive vs. // exclusive bounds by assuming a generalized notion of "bound" (vs. key) // that conveniently represents spaces between keys. For details, see // https://github.com/facebook/rocksdb/pull/11434 // The DIRECT FILTER for a range query is an order-based filter on the whole // key (non-empty intersection of bounds/keys). Simple minimum and maximum // keys for each SST file are automatically provided by metadata and used in // the read path for filtering (as well as binary search indexing). // PARTITIONING NOTE: SST metadata partitions do not have recorded minimum // and maximum keys, so require some special handling for range query // filtering. See https://github.com/facebook/rocksdb/pull/12872 etc. // * Where clauses - Additional constraints that can be put on range queries. // Specifically, a where clause is a tuple representing that the // concatenated sequence of segments from i to j (inclusive) compares between // b1 and b2 according to comparator c. // EXAMPLE: To represent that segment of ordinal i is equal to s, that would // be . // NOTE: To represent something like segment has a particular prefix, you // would need to split the key into more segments appropriately. There is // little loss of generality because we can combine adjacent segments for // specifying where clauses and implementing filters. // The DIRECT FILTER for a where clause is an order-based filter on the same // sequence of segments and comparator (non-empty intersection of bounds/keys), // or in the special case of an equality clause (see example), an equivalence // class filter on the sequence of segments. // // GENERALIZING FILTERS (INDIRECT): // * Point queries can utilize essentially any kind of filter by extracting // applicable segments of the query key (if not using whole key) and querying // the corresponding equivalence class or trivial range. // NOTE: There is NO requirement e.g. that the comparator used by the filter // match the CF key comparator or similar. The extractor simply needs to be // a pure function that does not return "out of bounds" segments. // FOR EXAMPLE, a min-max filter on the 4th segment of keys can also be // used for filtering point queries (Get/MultiGet) and could be as // effective and much more space efficient than a Bloom filter, depending // on the workload. // // Beyond point queries, we generally expect the key comparator to be a // lexicographic / big endian ordering at a high level (or the reverse of that // ordering), while each segment can use an arbitrary comparator. // FOR EXAMPLE, with a custom key comparator and segments extractor, // segment 0 could be a 4-byte unsigned little-endian integer, // segment 1 could be an 8-byte signed big-endian integer. This framework // requires segment 0 to come before segment 1 in the key and to take // precedence in key ordering (i.e. segment 1 order is only consulted when // keys are equal in segment 0). // // * Equivalence class filters can apply to range queries under conditions // resembling legacy prefix filtering (prefix_extractor). An equivalence class // filter on segments i through j and category set s is applicable to a range // query from lb to ub if // * All segments through j extracted from lb and ub are equal. // NOTE: being in the same filtering equivalence class is insufficient, as // that could be unrelated inputs with a hash collision. Here we are // omitting details that would formally accommodate comparators in which // different bytes can be considered equal. // * The categories of lb and ub are in the category set s. // * COMMON SEGMENT PREFIX PROPERTY (for all x, y, z; params j, s): if // * Keys x and z have equal segments up through ordinal j, and // * Keys x and z are in categories in category set s, and // * Key y is ordered x < y < z according to the CF comparator, // then both // * Key y has equal segments up through ordinal j (compared to x and z) // * Key y is in a category in category set s // (This is implied by the SEGMENT MAXIMAL PREFIX PROPERTY in the simplified // model.) // // * Order-based filters on segments (rather than whole key) can apply to range // queries (with "whole key" bounds). Specifically, an order-based filter on // segments i through j and category set s is applicable to a range query from // lb to ub if // * All segments through i-1 extracted from lb and ub are equal // * The categories of lb and ub are in the category set s. // * SEGMENT ORDERING PROPERTY for ordinal i through j, segments // comparator c, category set s, for all x, y, and z: if // * Keys x and z have equal segments up through ordinal i-1, and // * Keys x and z are in categories in category set s, and // * Key y is ordered x < y < z according to the CF comparator, // then both // * The common segment prefix property is satisifed through ordinal i-1 // and with category set s // * x_i..j <= y_i..j <= z_i..j according to segment comparator c, where // x_i..j is the concatenation of segments i through j of key x (etc.). // (This is implied by the SEGMENT MAXIMAL PREFIX PROPERTY in the simplified // model.) // // INTERESTING EXAMPLES: // Consider a segment encoding called BadVarInt1 in which a byte with // highest-order bit 1 means "start a new segment". Also consider BadVarInt0 // which starts a new segment on highest-order bit 0. // // Configuration: bytewise comp, BadVarInt1 format for segments 0-3 with // segment 3 also continuing to the end of the key // x = 0x 20 21|82 23||| // y = 0x 20 21|82 23 24|85|| // z = 0x 20 21|82 23|84 25|| // // For i=j=1, this set of keys violate the common segment prefix property and // segment ordering property, so can lead to incorrect equivalence class // filtering or order-based filtering. // // Suppose we modify the configuration so that "short" keys (empty in segment // 2) are placed in an unfiltered category. In that case, x above doesn't meet // the precondition for being limited by segment properties. Consider these // keys instead: // x = 0x 20 21|82 23 24|85|| // y = 0x 20 21|82 23 24|85 26|87| // z = 0x 20 21|82 23 24|85|86| // m = 0x 20 21|82 23 25|85|86| // n = 0x 20 21|82 23|84 25|| // // Although segment 1 values might be out of order with key order, // re-categorizing the short keys has allowed satisfying the common segment // prefix property with j=1 (and with j=0), so we can use equivalence class // filters on segment 1, or 0, or 0 to 1. However, violation of the segment // ordering property on i=j=1 (see z, m, n) means we can't use order-based. // // p = 0x 20 21|82 23|84 25 26|| // q = 0x 20 21|82 23|84 25|86| // // But keys can still be short from segment 2 to 3, and thus we are violating // the common segment prefix property for segment 2 (see n, p, q). // // Configuration: bytewise comp, BadVarInt0 format for segments 0-3 with // segment 3 also continuing to the end of the key. No short key category. // x = 0x 80 81|22 83||| // y = 0x 80 81|22 83|24 85|| // z = 0x 80 81|22 83 84|25|| // m = 0x 80 82|22 83||| // n = 0x 80 83|22 84|24 85|| // // Even though this violates the segment maximal prefix property of the // simplified model, the common segment prefix property and segment ordering // property are satisfied for the various segment ordinals. In broader terms, // the usual rule of the delimiter going with the segment before it can be // violated if every byte value below some threshold starts a segment. (This // has not been formally verified and is not recommended.) // // Suppose that we are paranoid, however, and decide to place short keys // (empty in segment 2) into an unfiltered category. This is potentially a // dangerous decision because loss of continuity at least affects the // ability to filter on segment 0 (common segment prefix property violated // with i=j=0; see z, m, n; m not in category set). Thus, excluding short keys // with categories is not a recommended solution either. class KeySegmentsExtractor { public: // The extractor assigns keys to categories so that it is easier to // combine distinct (though disjoint) key representations within a single // column family while applying different or overlapping filtering // configurations to the categories. // To enable fast set representation, the user is allowed up to 64 // categories for assigning to keys with the extractor. The user will // likely cast to their own enum type or scalars. enum KeyCategory : uint_fast8_t { kDefaultCategory = 0, kMinCategory = kDefaultCategory, // ... (user categories) // Can be used for keys ordered before typical keys. Not necessarily an // error. kReservedLowCategory = 62, // Can be used for keys ordered after typical keys. Not necessarily an // error. kReservedHighCategory = 63, kMaxUsableCategory = kReservedHighCategory, // Signals to the caller that an unexpected input or condition has // been reached and filter construction should be aborted. kErrorCategoryFilterScope = UINT8_MAX - 2, kMinErrorCategory = kErrorCategoryFilterScope, // Signals to the caller that an unexpected input or condition has // been reached and SST construction (and compaction or flush) // should be aborted. kErrorCategoryFileScope = UINT8_MAX - 1, // Signals to the caller that an unexpected input or condition has // been reached and the DB should be considered to have reached an // invalid state, at least in memory. kErrorCategoryDbScope = UINT8_MAX, }; using KeyCategorySet = SmallEnumSet; // The extractor can process three kinds of key-like inputs enum KeyKind { // User key, not including user timestamp kFullUserKey, // An iterator lower bound (inclusive). This should generally be handled // the same as a full user key but the distinction might be useful for // diagnostics or assertions. kInclusiveLowerBound, // An iterator upper bound (exclusive). Upper bounds are frequently // constructed by incrementing the last byte of a key prefix, and this can // affect what should be considered as a segment delimiter. kExclusiveUpperBound, }; // The extractor result struct Result { // Positions in the key (or bound) that represent boundaries // between segments, or the exclusive end of each segment. For example, if // the key is "abc|123|xyz" then following the guidance of including // delimiters with the preceding segment, segment_ends would be {4, 8, 11}, // representing segments "abc|" "123|" and "xyz". Empty segments are // naturally represented with repeated values, as in {4, 8, 8} for // "abc|123|", though {4, 8} would be logically equivalent because an // infinite sequence of 0-length segments is assumed after what is // explicitly represented here. However, segments might not reach the end // the key (no automatic last segment to the end of the key) and that is // OK for the WEAK ordering property. // // The first segment automatically starts at key position 0. The only way // to put gaps between segments of interest is to assign those gaps to // numbered segments, which can be left unused. std::vector segment_ends; // A category to assign to the key or bound. This default may be kept, // such as to put all keys into a single category. // IMPORTANT CURRENT LIMITATION from above: each category must be // contiguous in key comparator order, so any key between two keys in // category c must also be in category c. (Typically the category will be // determined by segment 0 in some way, often the first byte.) The enum // scalar values do not need to be related to key order. KeyCategory category = kDefaultCategory; void Reset() { segment_ends.clear(); category = kDefaultCategory; } }; virtual ~KeySegmentsExtractor() {} // A class name for this extractor. See also expectations in GetId(). virtual const char* Name() const = 0; // An identifying string that is permanently associated with the behavior // of this extractor. If a behavior change is made or set in the constructor, // the id must change to avoid incorrect filtering behavior on DBs using a // previous version of the extractor. virtual std::string GetId() const { // The default implementation assumes no configuration variance in the // constructor and just returns the class name. return Name(); } // Populates the extraction result and returns OK. Error can be signaled // with `kError` pseudo-categories. This function is expected to generate // non-error results (though possibly empty) on all keys or bounds expected // to be encountered by the DB. RocksDB will always call the function with // a (pointer to a) default-initialized result object. virtual void Extract(const Slice& key_or_bound, KeyKind kind, Result* result) const = 0; }; // Constructs a KeySegmentsExtractor for fixed-width key segments that safely // handles short keys by truncating segments at the end of the input key. // See comments on KeySegmentsExtractor for why this is much safer for // filtering than "all or nothing" fixed-size segments. This is essentially // a generalization of (New)CappedPrefixTransform. std::shared_ptr MakeSharedCappedKeySegmentsExtractor(const std::vector& byte_widths); // Alternatives for filtering inputs // An individual key segment. struct SelectKeySegment { // Segments are numbered starting from 0. explicit SelectKeySegment(uint16_t _segment_index) : segment_index(_segment_index) {} uint16_t segment_index; }; // A range of key segments concatenated together. No concatenation operations // are needed, however, because with no gaps between segments, a range of // segments is a simple substring of the key. struct SelectKeySegmentRange { // Segments are numbered starting from 0. Range is inclusive. explicit SelectKeySegmentRange(uint8_t _from_segment_index, uint8_t _to_segment_index) : from_segment_index(_from_segment_index), to_segment_index(_to_segment_index) {} // Inclusive uint8_t from_segment_index; // Inclusive uint8_t to_segment_index; }; // User key without timestamp struct SelectWholeKey {}; // TODO: The remaining Select* are not yet supported // As generated by prefix_extractor struct SelectLegacyKeyPrefix {}; struct SelectUserTimestamp {}; struct SelectColumnName {}; // NOTE: more variants might be added in the future. // NOTE2: filtering on values is not supported because it could easily break // overwrite semantics. (Filter out SST with newer, non-matching value but // see obsolete value that does match.) using FilterInput = std::variant; // Base class for individual filtering schemes in terms of chosen // FilterInputs, but not tied to a particular KeySegmentsExtractor. // // Not user extensible, name sometimes shortened to SQFC class SstQueryFilterConfig { public: virtual ~SstQueryFilterConfig() {} }; // A filtering scheme that stores minimum and maximum values (according // to bytewise ordering) of the specified filter input. Because the // empty string is often a special case, the filter excludes that from the // min/max computation and keeps a separate boolean for whether empty is // present. // // The filter is also limited to the specified categories, ignoring entries // outside the given set of categories. If not All, ranges can only be // filtered if upper and lower bounds are in the same category (and that // category is in the set relevant to the filter). std::shared_ptr MakeSharedBytewiseMinMaxSQFC( FilterInput select, KeySegmentsExtractor::KeyCategorySet categories = KeySegmentsExtractor::KeyCategorySet::All()); std::shared_ptr MakeSharedReverseBytewiseMinMaxSQFC( FilterInput select, KeySegmentsExtractor::KeyCategorySet categories = KeySegmentsExtractor::KeyCategorySet::All()); // TODO: more kinds of filters, eventually including Bloom/ribbon filters // and replacing the old filter configuration APIs // Represents a complete strategy for representing filters in SST files // and applying them to optimize range and point queries by excluding // irrelevant SST files (as best we can). This is a set of filtering // schemes and a KeySegmentsExtractor. For performance, a single extractor // should be implemented to meet all the filtering needs of any given // column family. KeySegmentsExtractor and FilterInput should be flexible // enough that there is no loss of generality, e.g. with leaving segments // blank and using segment ranges. struct SstQueryFilterConfigs { std::vector> filters; std::shared_ptr extractor; // Whether this object represent an empty set of configs because no // applicable configurations were found. (This case is represented by // an internal singleton instance.) bool IsEmptyNotFound() const; }; // SstQueryFilterConfigsManager provides facilities for safe and effective // filtering version management, with simple dynamic upgrade/downgrade // support. It is designed to encourage a development pattern that // minimizes the risk of filter and extractor versioning bugs. // // SstQueryFilterConfigsManager is essentially an immutable mapping // from {config_name_string, version_number} -> SstQueryFilterConfigs // for some contiguous range of version numbers. It is also a starting // point for specifying which configuration should be used, with awareness // of other configurations that might already be persisted in SST files // or switched to dynamically. // // Background: a single codebase might have many use cases and for // each use case, a sequence of past, current, and future filtering // configurations. It is common for future configurations to be // implemented before automatically deployed in order to ensure that // a DB can be effectively opened and operated on by a recent older code // version. And it is common to maintain a reasonable history of past // configurations to ensure smooth upgrade path and proper handling of // older SST files that haven't been compacted recently. // // It would be possible to make SstQueryFilterConfigs dynamically // configurable through strings, but that would encourage deployment // of ad-hoc, under-tested configurations. // // Solution: the {config_name_string, version_number} -> SstQueryFilterConfigs // mapping in SstQueryFilterConfigsManager formalizes the history (past and // future) of approved/tested configurations for a given use case. Filter // configurations are kept paired with extractors that they make sense with. // // The version numbers are "global" to the SstQueryFilterConfigsManager so // that it is simple to determine whether a particular code version supports // a particular filtering version, regardless of which use case. Numbering // always starts with 1, as 0 is reserved for selecting a "no filters" // configuration // // Consider an example initialized with this Data: // // SstQueryFilterConfigsManager::Data data = { // {1, {{"foo", foo_configs}}}, // {2, {{"bar", bar_configs}, // {"baz", baz_configs}}}, // {3, {{"bar", bar_configs_v2}}}, // }; // // For example, MakeSharedFactory(..., "baz", 1) will use a default empty // config, while both MakeSharedFactory(..., "baz", 2) and // MakeSharedFactory(..., "baz", 3) select `baz_configs`. Selecting version // >= 4 is rejected because those configurations are not known to this // version of the code. // // For correct operation, existing versions should be treated as immutable // (once committed to where they could enter production). For example, an // update to "baz" should be done in a new version (4), not by amending to // version 3. Note also (from before) that the behavior of named extractors // must not change, so changes to key segment extraction should introduce // new named extractors while keeping the old in the older configs. // // It is possible to eventually remove the oldest versions, as long as // * You won't be rolling back to that version. // * You don't have any SST files using an extractor that is only available // in that version (and prior). // * For each use case and version you might roll back to, you aren't removing // the configuration in effect for that version. In our example above, we // cannot simply remove version 1 because that would change the configuration // of "foo" at version 2. Moving {"foo", foo_configs} to version 2 would be // an acceptable work-around for retiring version 1. // * There are no gaps in the version numbers specified. Even if you completely // retire a use case and want to remove relevant code, you still need to keep // an explicit mapping, even if it's empty, as in `{3, {}}` if retiring "bar". // // Internally, the SstQueryFilterConfigsManager greatly simplifies lifetime // management for relevant objects such as KeySegmentExtractors, and provides // a lighter weight (and less troublesome) mechanism for relevant named object // look-up vs. ObjectRegistry. If following the guidelines above, any extractor // referenced in a read SST file should also be referenced by the // SstQueryFilterConfigsManager. // // Not user extensible class SstQueryFilterConfigsManager : public std::enable_shared_from_this { public: using FilteringVersion = uint32_t; using NamedConfigs = std::pair; using Data = std::vector>>; static Status MakeShared(const Data& data, std::shared_ptr* out); virtual ~SstQueryFilterConfigsManager() {} // EXPERIMENTAL/TEMPORARY: hook into table properties for persisting // filters and table_filter for applying to range queries. class Factory : public TablePropertiesCollectorFactory { public: // Modify the target filtering version for new filters. Returns // non-OK if the version is not supported. Thread-safe. virtual Status SetFilteringVersion(FilteringVersion ver) = 0; virtual FilteringVersion GetFilteringVersion() const = 0; // The configs_name used to create this Factory. Immutable. virtual const std::string& GetConfigsName() const = 0; // The relevant configs from the SstQueryFilterConfigsManager for // the ConfigsName and FilteringVersion. virtual const SstQueryFilterConfigs& GetConfigs() const = 0; // The buffers pointed to by the Slices must live as long as any read // operations using this table filter function. // Can read and process any filters created under this // SstQueryFilterConfigsManager but is most efficient when using the // same KeySegmentExtractor as this Factory's configs. // (That performance optimization is the only reason this function is here // rather than in SstQueryFilterConfigsManager.) virtual std::function GetTableFilterForRangeQuery(Slice lower_bound_incl, Slice upper_bound_excl) const = 0; }; // Returns OK and creates a Factory as long as `ver` is in the // supported range or 0 (always empty/not found). If the particular // config_name is not found under that version, then // factory->GetConfigs().IsEmptyNotFound() will be true. Such a factory can // read filters but will not write any filters. virtual Status MakeSharedFactory(const std::string& configs_name, FilteringVersion ver, std::shared_ptr* out) const = 0; }; } // namespace experimental } // namespace ROCKSDB_NAMESPACE