mirror of
https://github.com/danog/strum.git
synced 2024-11-26 12:04:38 +01:00
Add support for PHF in EnumString (#220)
* Support phf crate for faster match on large enums * comment * add changelog entry * add tests, embed phf, and improve lowercase support * fix doc change * Refactor & support case insensitive with case sensitive phf match * more tests, some fixes of new implementation and prep for feature
This commit is contained in:
parent
ac757fa970
commit
832dd862c7
@ -1,5 +1,10 @@
|
||||
# Changelog
|
||||
|
||||
## Unreleased
|
||||
|
||||
* [#220](https://github.com/Peternator7/strum/pull/220). Add support for PHF in `EnumString` (opt-in runtime
|
||||
performance improvements for large enums as `#[strum(use_phf)]`, requires `phf` feature and increases MSRV to `1.46`)
|
||||
|
||||
## 0.24.0
|
||||
|
||||
* [#212](https://github.com/Peternator7/strum/pull/212). Fix some clippy lints
|
||||
|
@ -16,6 +16,7 @@ readme = "../README.md"
|
||||
|
||||
[dependencies]
|
||||
strum_macros = { path = "../strum_macros", optional = true, version = "0.24" }
|
||||
phf = { version = "0.10", features = ["macros"], optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
strum_macros = { path = "../strum_macros", version = "0.24" }
|
||||
|
@ -30,6 +30,10 @@
|
||||
// only for documentation purposes
|
||||
pub mod additional_attributes;
|
||||
|
||||
#[cfg(feature = "phf")]
|
||||
#[doc(hidden)]
|
||||
pub use phf as _private_phf_reexport_for_macro_if_phf_feature;
|
||||
|
||||
/// The `ParseError` enum is a collection of all the possible reasons
|
||||
/// an enum can fail to parse from a string.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
|
||||
|
@ -5,7 +5,8 @@ use syn::{
|
||||
parse2, parse_str,
|
||||
punctuated::Punctuated,
|
||||
spanned::Spanned,
|
||||
Attribute, DeriveInput, Ident, Lit, LitBool, LitStr, Meta, MetaNameValue, Path, Token, Variant, Visibility,
|
||||
Attribute, DeriveInput, Ident, Lit, LitBool, LitStr, Meta, MetaNameValue, Path, Token, Variant,
|
||||
Visibility,
|
||||
};
|
||||
|
||||
use super::case_style::CaseStyle;
|
||||
@ -16,6 +17,7 @@ pub mod kw {
|
||||
|
||||
// enum metadata
|
||||
custom_keyword!(serialize_all);
|
||||
custom_keyword!(use_phf);
|
||||
|
||||
// enum discriminant metadata
|
||||
custom_keyword!(derive);
|
||||
@ -43,6 +45,7 @@ pub enum EnumMeta {
|
||||
kw: kw::Crate,
|
||||
crate_module_path: Path,
|
||||
},
|
||||
UsePhf(kw::use_phf),
|
||||
}
|
||||
|
||||
impl Parse for EnumMeta {
|
||||
@ -64,8 +67,9 @@ impl Parse for EnumMeta {
|
||||
crate_module_path,
|
||||
})
|
||||
} else if lookahead.peek(kw::ascii_case_insensitive) {
|
||||
let kw = input.parse()?;
|
||||
Ok(EnumMeta::AsciiCaseInsensitive(kw))
|
||||
Ok(EnumMeta::AsciiCaseInsensitive(input.parse()?))
|
||||
} else if lookahead.peek(kw::use_phf) {
|
||||
Ok(EnumMeta::UsePhf(input.parse()?))
|
||||
} else {
|
||||
Err(lookahead.error())
|
||||
}
|
||||
@ -78,6 +82,7 @@ impl Spanned for EnumMeta {
|
||||
EnumMeta::SerializeAll { kw, .. } => kw.span(),
|
||||
EnumMeta::AsciiCaseInsensitive(kw) => kw.span(),
|
||||
EnumMeta::Crate { kw, .. } => kw.span(),
|
||||
EnumMeta::UsePhf(use_phf) => use_phf.span(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -275,14 +280,19 @@ pub trait VariantExt {
|
||||
impl VariantExt for Variant {
|
||||
fn get_metadata(&self) -> syn::Result<Vec<VariantMeta>> {
|
||||
let result = get_metadata_inner("strum", &self.attrs)?;
|
||||
self.attrs.iter()
|
||||
self.attrs
|
||||
.iter()
|
||||
.filter(|attr| attr.path.is_ident("doc"))
|
||||
.try_fold(result, |mut vec, attr| {
|
||||
if let Meta::NameValue(MetaNameValue { lit: Lit::Str(value), .. }) = attr.parse_meta()? {
|
||||
vec.push(VariantMeta::Documentation { value })
|
||||
}
|
||||
Ok(vec)
|
||||
})
|
||||
if let Meta::NameValue(MetaNameValue {
|
||||
lit: Lit::Str(value),
|
||||
..
|
||||
}) = attr.parse_meta()?
|
||||
{
|
||||
vec.push(VariantMeta::Documentation { value })
|
||||
}
|
||||
Ok(vec)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ pub struct StrumTypeProperties {
|
||||
pub discriminant_name: Option<Ident>,
|
||||
pub discriminant_others: Vec<TokenStream>,
|
||||
pub discriminant_vis: Option<Visibility>,
|
||||
pub use_phf: bool,
|
||||
}
|
||||
|
||||
impl HasTypeProperties for DeriveInput {
|
||||
@ -31,6 +32,7 @@ impl HasTypeProperties for DeriveInput {
|
||||
|
||||
let mut serialize_all_kw = None;
|
||||
let mut ascii_case_insensitive_kw = None;
|
||||
let mut use_phf_kw = None;
|
||||
let mut crate_module_path_kw = None;
|
||||
for meta in strum_meta {
|
||||
match meta {
|
||||
@ -50,6 +52,14 @@ impl HasTypeProperties for DeriveInput {
|
||||
ascii_case_insensitive_kw = Some(kw);
|
||||
output.ascii_case_insensitive = true;
|
||||
}
|
||||
EnumMeta::UsePhf(kw) => {
|
||||
if let Some(fst_kw) = use_phf_kw {
|
||||
return Err(occurrence_error(fst_kw, kw, "use_phf"));
|
||||
}
|
||||
|
||||
use_phf_kw = Some(kw);
|
||||
output.use_phf = true;
|
||||
}
|
||||
EnumMeta::Crate {
|
||||
crate_module_path,
|
||||
kw,
|
||||
|
@ -47,6 +47,9 @@ fn debug_print_generated(ast: &DeriveInput, toks: &TokenStream) {
|
||||
/// See the [Additional Attributes](https://docs.rs/strum/0.22/strum/additional_attributes/index.html)
|
||||
/// Section for more information on using this feature.
|
||||
///
|
||||
/// If you have a large enum, you may want to consider using the `use_phf` attribute here. It leverages
|
||||
/// perfect hash functions to parse much quicker than a standard `match`. (MSRV 1.46)
|
||||
///
|
||||
/// # Example howto use `EnumString`
|
||||
/// ```
|
||||
/// use std::str::FromStr;
|
||||
@ -472,11 +475,11 @@ pub fn from_repr(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||
/// Encode strings into the enum itself. The `strum_macros::EmumMessage` macro implements the `strum::EnumMessage` trait.
|
||||
/// `EnumMessage` looks for `#[strum(message="...")]` attributes on your variants.
|
||||
/// You can also provided a `detailed_message="..."` attribute to create a seperate more detailed message than the first.
|
||||
///
|
||||
///
|
||||
/// `EnumMessage` also exposes the variants doc comments through `get_documentation()`. This is useful in some scenarios,
|
||||
/// but `get_message` should generally be preferred. Rust doc comments are intended for developer facing documentation,
|
||||
/// not end user messaging.
|
||||
///
|
||||
///
|
||||
/// ```
|
||||
/// // You need to bring the trait into scope to use it
|
||||
/// use strum::EnumMessage;
|
||||
|
@ -18,8 +18,15 @@ pub fn from_string_inner(ast: &DeriveInput) -> syn::Result<TokenStream> {
|
||||
let strum_module_path = type_properties.crate_module_path();
|
||||
|
||||
let mut default_kw = None;
|
||||
let mut default = quote! { _ => ::core::result::Result::Err(#strum_module_path::ParseError::VariantNotFound) };
|
||||
let mut arms = Vec::new();
|
||||
let mut default =
|
||||
quote! { ::core::result::Result::Err(#strum_module_path::ParseError::VariantNotFound) };
|
||||
let mut phf_exact_match_arms = Vec::new();
|
||||
// We'll use the first one if there are many variants
|
||||
let mut phf_lowercase_arms = Vec::new();
|
||||
// However if there are few variants we'll want to integrate these in the standard match to avoid alloc
|
||||
let mut case_insensitive_arms_alternative = Vec::new();
|
||||
// Later we can add custom arms in there
|
||||
let mut standard_match_arms = Vec::new();
|
||||
for variant in variants {
|
||||
let ident = &variant.ident;
|
||||
let variant_properties = variant.get_variant_properties()?;
|
||||
@ -45,26 +52,11 @@ pub fn from_string_inner(ast: &DeriveInput) -> syn::Result<TokenStream> {
|
||||
|
||||
default_kw = Some(kw);
|
||||
default = quote! {
|
||||
default => ::core::result::Result::Ok(#name::#ident(default.into()))
|
||||
::core::result::Result::Ok(#name::#ident(s.into()))
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
let is_ascii_case_insensitive = variant_properties
|
||||
.ascii_case_insensitive
|
||||
.unwrap_or(type_properties.ascii_case_insensitive);
|
||||
// If we don't have any custom variants, add the default serialized name.
|
||||
let attrs = variant_properties
|
||||
.get_serializations(type_properties.case_style)
|
||||
.into_iter()
|
||||
.map(|serialization| {
|
||||
if is_ascii_case_insensitive {
|
||||
quote! { s if s.eq_ignore_ascii_case(#serialization) }
|
||||
} else {
|
||||
quote! { #serialization }
|
||||
}
|
||||
});
|
||||
|
||||
let params = match &variant.fields {
|
||||
Fields::Unit => quote! {},
|
||||
Fields::Unnamed(fields) => {
|
||||
@ -81,19 +73,95 @@ pub fn from_string_inner(ast: &DeriveInput) -> syn::Result<TokenStream> {
|
||||
}
|
||||
};
|
||||
|
||||
arms.push(quote! { #(#attrs => ::core::result::Result::Ok(#name::#ident #params)),* });
|
||||
let is_ascii_case_insensitive = variant_properties
|
||||
.ascii_case_insensitive
|
||||
.unwrap_or(type_properties.ascii_case_insensitive);
|
||||
|
||||
// If we don't have any custom variants, add the default serialized name.
|
||||
for serialization in variant_properties.get_serializations(type_properties.case_style) {
|
||||
if type_properties.use_phf {
|
||||
if !is_ascii_case_insensitive {
|
||||
phf_exact_match_arms.push(quote! { #serialization => #name::#ident #params, });
|
||||
} else {
|
||||
// In that case we'll store the lowercase values in phf, and lowercase at runtime
|
||||
// before searching
|
||||
// Unless there are few such variants, in that case we'll use the standard match with
|
||||
// eq_ignore_ascii_case to avoid allocating
|
||||
case_insensitive_arms_alternative.push(quote! { s if s.eq_ignore_ascii_case(#serialization) => #name::#ident #params, });
|
||||
|
||||
let mut ser_string = serialization.value();
|
||||
ser_string.make_ascii_lowercase();
|
||||
let serialization = syn::LitStr::new(&ser_string, serialization.span());
|
||||
phf_lowercase_arms.push(quote! { #serialization => #name::#ident #params, });
|
||||
}
|
||||
} else {
|
||||
standard_match_arms.push(if !is_ascii_case_insensitive {
|
||||
quote! { #serialization => #name::#ident #params, }
|
||||
} else {
|
||||
quote! { s if s.eq_ignore_ascii_case(#serialization) => #name::#ident #params, }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
arms.push(default);
|
||||
// Probably under that string allocation is more expensive than matching few times
|
||||
// Proper threshold is not benchmarked - feel free to do so :)
|
||||
if phf_lowercase_arms.len() <= 3 {
|
||||
standard_match_arms.extend(case_insensitive_arms_alternative);
|
||||
phf_lowercase_arms.clear();
|
||||
}
|
||||
|
||||
let use_phf = if phf_exact_match_arms.is_empty() && phf_lowercase_arms.is_empty() {
|
||||
quote!()
|
||||
} else {
|
||||
quote! {
|
||||
use #strum_module_path::_private_phf_reexport_for_macro_if_phf_feature as phf;
|
||||
}
|
||||
};
|
||||
let phf_body = if phf_exact_match_arms.is_empty() {
|
||||
quote!()
|
||||
} else {
|
||||
quote! {
|
||||
static PHF: phf::Map<&'static str, #name> = phf::phf_map! {
|
||||
#(#phf_exact_match_arms)*
|
||||
};
|
||||
if let Some(value) = PHF.get(s).cloned() {
|
||||
return ::core::result::Result::Ok(value);
|
||||
}
|
||||
}
|
||||
};
|
||||
let phf_lowercase_body = if phf_lowercase_arms.is_empty() {
|
||||
quote!()
|
||||
} else {
|
||||
quote! {
|
||||
static PHF_LOWERCASE: phf::Map<&'static str, #name> = phf::phf_map! {
|
||||
#(#phf_lowercase_arms)*
|
||||
};
|
||||
if let Some(value) = PHF_LOWERCASE.get(&s.to_ascii_lowercase()).cloned() {
|
||||
return ::core::result::Result::Ok(value);
|
||||
}
|
||||
}
|
||||
};
|
||||
let standard_match_body = if standard_match_arms.is_empty() {
|
||||
default
|
||||
} else {
|
||||
quote! {
|
||||
::core::result::Result::Ok(match s {
|
||||
#(#standard_match_arms)*
|
||||
_ => return #default,
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
let from_str = quote! {
|
||||
#[allow(clippy::use_self)]
|
||||
impl #impl_generics ::core::str::FromStr for #name #ty_generics #where_clause {
|
||||
type Err = #strum_module_path::ParseError;
|
||||
fn from_str(s: &str) -> ::core::result::Result< #name #ty_generics , <Self as ::core::str::FromStr>::Err> {
|
||||
match s {
|
||||
#(#arms),*
|
||||
}
|
||||
#use_phf
|
||||
#phf_body
|
||||
#phf_lowercase_body
|
||||
#standard_match_body
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -4,6 +4,10 @@ version = "0.24.0"
|
||||
edition = "2018"
|
||||
authors = ["Peter Glotfelty <peglotfe@microsoft.com>"]
|
||||
|
||||
[features]
|
||||
default = ["test_phf"]
|
||||
test_phf = ["strum/phf"]
|
||||
|
||||
[dependencies]
|
||||
strum = { path = "../strum", features = ["derive"] }
|
||||
strum_macros = { path = "../strum_macros", features = [] }
|
||||
|
33
strum_tests/tests/phf.rs
Normal file
33
strum_tests/tests/phf.rs
Normal file
@ -0,0 +1,33 @@
|
||||
#[cfg(feature = "test_phf")]
|
||||
#[test]
|
||||
fn from_str_with_phf() {
|
||||
#[derive(Debug, PartialEq, Eq, Clone, strum::EnumString)]
|
||||
#[strum(use_phf)]
|
||||
enum Color {
|
||||
#[strum(ascii_case_insensitive)]
|
||||
Blue,
|
||||
Red,
|
||||
}
|
||||
assert_eq!("Red".parse::<Color>().unwrap(), Color::Red);
|
||||
assert_eq!("bLuE".parse::<Color>().unwrap(), Color::Blue);
|
||||
}
|
||||
|
||||
#[cfg(feature = "test_phf")]
|
||||
#[test]
|
||||
fn from_str_with_phf_big() {
|
||||
// This tests PHF when there are many case insensitive variants
|
||||
#[derive(Debug, PartialEq, Eq, Clone, strum::EnumString)]
|
||||
#[strum(use_phf, ascii_case_insensitive)]
|
||||
enum Enum {
|
||||
Var1,
|
||||
Var2,
|
||||
Var3,
|
||||
Var4,
|
||||
Var5,
|
||||
Var6,
|
||||
Var7,
|
||||
Var8,
|
||||
Var9,
|
||||
}
|
||||
assert_eq!("vAr2".parse::<Enum>().unwrap(), Enum::Var2);
|
||||
}
|
Loading…
Reference in New Issue
Block a user