diff --git a/.gitignore b/.gitignore index 4f29e17..89e3a70 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ Cargo.lock /.vscode /.idea -expand.rs +/tmp +expand.rs \ No newline at end of file diff --git a/allowed_bindings.rs b/allowed_bindings.rs index 827c750..6afa7ca 100644 --- a/allowed_bindings.rs +++ b/allowed_bindings.rs @@ -43,6 +43,8 @@ bind! { // ext_php_rs_zend_object_release, // ext_php_rs_zend_string_init, // ext_php_rs_zend_string_release, + // ext_php_rs_is_kown_valid_utf8, + // ext_php_rs_set_kown_valid_utf8, object_properties_init, php_info_print_table_end, php_info_print_table_header, diff --git a/src/builders/class.rs b/src/builders/class.rs index bfc7453..b249506 100644 --- a/src/builders/class.rs +++ b/src/builders/class.rs @@ -226,7 +226,7 @@ impl ClassBuilder { /// /// Returns an [`Error`] variant if the class could not be registered. pub fn build(mut self) -> Result<&'static mut ClassEntry> { - self.ce.name = ZendStr::new_interned(&self.name, true)?.into_raw(); + self.ce.name = ZendStr::new_interned(&self.name, true).into_raw(); self.methods.push(FunctionEntry::end()); let func = Box::into_raw(self.methods.into_boxed_slice()) as *const FunctionEntry; diff --git a/src/error.rs b/src/error.rs index eb24096..eaa0773 100644 --- a/src/error.rs +++ b/src/error.rs @@ -48,6 +48,8 @@ pub enum Error { /// The string could not be converted into a C-string due to the presence of /// a NUL character. InvalidCString, + /// The string could not be converted into a valid Utf8 string + InvalidUtf8, /// Could not call the given function. Callable, /// An invalid exception type was thrown. @@ -82,6 +84,7 @@ impl Display for Error { f, "String given contains NUL-bytes which cannot be present in a C string." ), + Error::InvalidUtf8 => write!(f, "Invalid Utf8 byte sequence."), Error::Callable => write!(f, "Could not call given function."), Error::InvalidException(flags) => { write!(f, "Invalid exception type was thrown: {:?}", flags) diff --git a/src/ffi.rs b/src/ffi.rs index e750f47..92614c4 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -19,6 +19,9 @@ extern "C" { persistent: bool, ) -> *mut zend_string; pub fn ext_php_rs_zend_string_release(zs: *mut zend_string); + pub fn ext_php_rs_is_known_valid_utf8(zs: *const zend_string) -> bool; + pub fn ext_php_rs_set_known_valid_utf8(zs: *mut zend_string); + pub fn ext_php_rs_php_build_id() -> *const c_char; pub fn ext_php_rs_zend_object_alloc(obj_size: usize, ce: *mut zend_class_entry) -> *mut c_void; pub fn ext_php_rs_zend_object_release(obj: *mut zend_object); diff --git a/src/types/object.rs b/src/types/object.rs index 24c70d8..606c483 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -137,7 +137,7 @@ impl ZendObject { return Err(Error::InvalidProperty); } - let mut name = ZendStr::new(name, false)?; + let mut name = ZendStr::new(name, false); let mut rv = Zval::new(); let zv = unsafe { @@ -162,7 +162,7 @@ impl ZendObject { /// * `name` - The name of the property. /// * `value` - The value to set the property to. pub fn set_property(&mut self, name: &str, value: impl IntoZval) -> Result<()> { - let mut name = ZendStr::new(name, false)?; + let mut name = ZendStr::new(name, false); let mut value = value.into_zval(false)?; unsafe { @@ -187,7 +187,7 @@ impl ZendObject { /// * `name` - The name of the property. /// * `query` - The 'query' to classify if a property exists. pub fn has_property(&self, name: &str, query: PropertyQuery) -> Result { - let mut name = ZendStr::new(name, false)?; + let mut name = ZendStr::new(name, false); Ok(unsafe { self.handlers()?.has_property.ok_or(Error::InvalidScope)?( diff --git a/src/types/string.rs b/src/types/string.rs index dd9624a..6256ee2 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -16,6 +16,7 @@ use crate::{ convert::{FromZval, IntoZval}, error::{Error, Result}, ffi::{ + ext_php_rs_is_known_valid_utf8, ext_php_rs_set_known_valid_utf8, ext_php_rs_zend_string_init, ext_php_rs_zend_string_release, zend_string, zend_string_init_interned, }, @@ -30,7 +31,7 @@ use crate::{ /// cannot represent unsized types, an array of size 1 is used at the end of the /// type to represent the contents of the string, therefore this type is /// actually unsized. All constructors return [`ZBox`], the owned -/// varaint. +/// variant. /// /// Once the `ptr_metadata` feature lands in stable rust, this type can /// potentially be changed to a DST using slices and metadata. See the tracking issue here: @@ -46,7 +47,7 @@ static INTERNED_LOCK: Mutex<()> = const_mutex(()); // on the alias `ZendStr` :( #[allow(clippy::len_without_is_empty)] impl ZendStr { - /// Creates a new Zend string from a [`str`]. + /// Creates a new Zend string from a slice of bytes. /// /// # Parameters /// @@ -54,12 +55,6 @@ impl ZendStr { /// * `persistent` - Whether the string should persist through the request /// boundary. /// - /// # Returns - /// - /// Returns a result containing the Zend string if successful. Returns an - /// error if the given string contains NUL bytes, which cannot be - /// contained inside a C string. - /// /// # Panics /// /// Panics if the function was unable to allocate memory for the Zend @@ -78,10 +73,19 @@ impl ZendStr { /// ```no_run /// use ext_php_rs::types::ZendStr; /// - /// let s = ZendStr::new("Hello, world!", false).unwrap(); + /// let s = ZendStr::new("Hello, world!", false); + /// let php = ZendStr::new([80, 72, 80], false); /// ``` - pub fn new(str: &str, persistent: bool) -> Result> { - Ok(Self::from_c_str(&CString::new(str)?, persistent)) + pub fn new(str: impl AsRef<[u8]>, persistent: bool) -> ZBox { + let s = str.as_ref(); + // TODO: we should handle the special cases when length is either 0 or 1 + // see `zend_string_init_fast()` in `zend_string.h` + unsafe { + let ptr = ext_php_rs_zend_string_init(s.as_ptr().cast(), s.len(), persistent) + .as_mut() + .expect("Failed to allocate memory for new Zend string"); + ZBox::from_raw(ptr) + } } /// Creates a new Zend string from a [`CStr`]. @@ -126,7 +130,7 @@ impl ZendStr { } } - /// Creates a new interned Zend string from a [`str`]. + /// Creates a new interned Zend string from a slice of bytes. /// /// An interned string is only ever stored once and is immutable. PHP stores /// the string in an internal hashtable which stores the interned @@ -145,16 +149,12 @@ impl ZendStr { /// * `persistent` - Whether the string should persist through the request /// boundary. /// - /// # Returns - /// - /// Returns a result containing the Zend string if successful. Returns an - /// error if the given string contains NUL bytes, which cannot be - /// contained inside a C string. - /// /// # Panics /// - /// Panics if the function was unable to allocate memory for the Zend - /// string. + /// Panics under the following circumstances: + /// + /// * The function used to create interned strings has not been set. + /// * The function could not allocate enough memory for the Zend string. /// /// # Safety /// @@ -171,8 +171,16 @@ impl ZendStr { /// /// let s = ZendStr::new_interned("PHP", true); /// ``` - pub fn new_interned(str: &str, persistent: bool) -> Result> { - Ok(Self::interned_from_c_str(&CString::new(str)?, persistent)) + pub fn new_interned(str: impl AsRef<[u8]>, persistent: bool) -> ZBox { + let _lock = INTERNED_LOCK.lock(); + let s = str.as_ref(); + unsafe { + let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready"); + let ptr = init(s.as_ptr().cast(), s.len() as _, persistent) + .as_mut() + .expect("Failed to allocate memory for new Zend string"); + ZBox::from_raw(ptr) + } } /// Creates a new interned Zend string from a [`CStr`]. @@ -222,11 +230,8 @@ impl ZendStr { let _lock = INTERNED_LOCK.lock(); unsafe { - let ptr = zend_string_init_interned.expect("`zend_string_init_interned` not ready")( - str.as_ptr(), - str.to_bytes().len() as _, - persistent, - ); + let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready"); + let ptr = init(str.as_ptr(), str.to_bytes().len() as _, persistent); ZBox::from_raw( ptr.as_mut() @@ -242,7 +247,7 @@ impl ZendStr { /// ```no_run /// use ext_php_rs::types::ZendStr; /// - /// let s = ZendStr::new("hello, world!", false).unwrap(); + /// let s = ZendStr::new("hello, world!", false); /// assert_eq!(s.len(), 13); /// ``` pub fn len(&self) -> usize { @@ -256,39 +261,61 @@ impl ZendStr { /// ```no_run /// use ext_php_rs::types::ZendStr; /// - /// let s = ZendStr::new("hello, world!", false).unwrap(); + /// let s = ZendStr::new("hello, world!", false); /// assert_eq!(s.is_empty(), false); /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } - /// Returns a reference to the underlying [`CStr`] inside the Zend string. - pub fn as_c_str(&self) -> &CStr { - // SAFETY: Zend strings store their readable length in a fat pointer. - unsafe { - let slice = slice::from_raw_parts(self.val.as_ptr() as *const u8, self.len() + 1); - CStr::from_bytes_with_nul_unchecked(slice) - } + /// Attempts to return a reference to the underlying bytes inside the Zend + /// string as a [`CStr`]. + /// + /// Returns an [Error::InvalidCString] variant if the string contains null + /// bytes. + pub fn as_c_str(&self) -> Result<&CStr> { + let bytes_with_null = + unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len() + 1) }; + CStr::from_bytes_with_nul(bytes_with_null).map_err(|_| Error::InvalidCString) } - /// Attempts to return a reference to the underlying [`str`] inside the Zend + /// Attempts to return a reference to the underlying bytes inside the Zend /// string. /// - /// Returns the [`None`] variant if the [`CStr`] contains non-UTF-8 - /// characters. + /// Returns an [Error::InvalidUtf8] variant if the [`str`] contains + /// non-UTF-8 characters. /// /// # Example /// /// ```no_run /// use ext_php_rs::types::ZendStr; /// - /// let s = ZendStr::new("hello, world!", false).unwrap(); - /// let as_str = s.as_str(); - /// assert_eq!(as_str, Some("hello, world!")); + /// let s = ZendStr::new("hello, world!", false); + /// assert!(s.as_str().is_ok()); /// ``` - pub fn as_str(&self) -> Option<&str> { - self.as_c_str().to_str().ok() + pub fn as_str(&self) -> Result<&str> { + if unsafe { ext_php_rs_is_known_valid_utf8(self.as_ptr()) } { + let str = unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }; + return Ok(str); + } + let str = std::str::from_utf8(self.as_bytes()).map_err(|_| Error::InvalidUtf8)?; + unsafe { ext_php_rs_set_known_valid_utf8(self.as_ptr() as *mut _) }; + Ok(str) + } + + /// Returns a reference to the underlying bytes inside the Zend string. + pub fn as_bytes(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len()) } + } + + /// Returns a raw pointer to this object + pub fn as_ptr(&self) -> *const ZendStr { + self as *const _ + } + + /// Returns a mutable pointer to this object + pub fn as_mut_ptr(&mut self) -> *mut ZendStr { + self as *mut _ } } @@ -300,7 +327,22 @@ unsafe impl ZBoxable for ZendStr { impl Debug for ZendStr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.as_c_str().fmt(f) + self.as_str().fmt(f) + } +} + +impl AsRef<[u8]> for ZendStr { + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl PartialEq for ZendStr +where + T: AsRef<[u8]>, +{ + fn eq(&self, other: &T) -> bool { + self.as_ref() == other.as_ref() } } @@ -308,19 +350,14 @@ impl ToOwned for ZendStr { type Owned = ZBox; fn to_owned(&self) -> Self::Owned { - Self::from_c_str(self.as_c_str(), false) + Self::new(self.as_bytes(), false) } } -impl PartialEq for ZendStr { - #[inline] - fn eq(&self, other: &Self) -> bool { - self.as_c_str().eq(other.as_c_str()) - } -} +impl<'a> TryFrom<&'a ZendStr> for &'a CStr { + type Error = Error; -impl<'a> From<&'a ZendStr> for &'a CStr { - fn from(value: &'a ZendStr) -> Self { + fn try_from(value: &'a ZendStr) -> Result { value.as_c_str() } } @@ -329,7 +366,7 @@ impl<'a> TryFrom<&'a ZendStr> for &'a str { type Error = Error; fn try_from(value: &'a ZendStr) -> Result { - value.as_str().ok_or(Error::InvalidCString) + value.as_str() } } @@ -337,10 +374,7 @@ impl TryFrom<&ZendStr> for String { type Error = Error; fn try_from(value: &ZendStr) -> Result { - value - .as_str() - .map(|s| s.to_string()) - .ok_or(Error::InvalidCString) + value.as_str().map(ToString::to_string) } } @@ -362,18 +396,14 @@ impl From for ZBox { } } -impl TryFrom<&str> for ZBox { - type Error = Error; - - fn try_from(value: &str) -> Result { - ZendStr::new(value, false) +impl From<&str> for ZBox { + fn from(value: &str) -> Self { + ZendStr::new(value.as_bytes(), false) } } -impl TryFrom for ZBox { - type Error = Error; - - fn try_from(value: String) -> Result { +impl From for ZBox { + fn from(value: String) -> Self { ZendStr::new(value.as_str(), false) } } diff --git a/src/types/zval.rs b/src/types/zval.rs index 12292e6..34c8c81 100644 --- a/src/types/zval.rs +++ b/src/types/zval.rs @@ -113,7 +113,7 @@ impl Zval { /// convert other types into a [`String`], as it could not pass back a /// [`&str`] in those cases. pub fn str(&self) -> Option<&str> { - self.zend_str().and_then(|zs| zs.as_str()) + self.zend_str().and_then(|zs| zs.as_str().ok()) } /// Returns the value of the zval if it is a string and can be unpacked into @@ -340,7 +340,7 @@ impl Zval { /// * `val` - The value to set the zval as. /// * `persistent` - Whether the string should persist between requests. pub fn set_string(&mut self, val: &str, persistent: bool) -> Result<()> { - self.set_zend_string(ZendStr::new(val, persistent)?); + self.set_zend_string(ZendStr::new(val, persistent)); Ok(()) } @@ -374,7 +374,7 @@ impl Zval { /// * `val` - The value to set the zval as. /// * `persistent` - Whether the string should persist between requests. pub fn set_interned_string(&mut self, val: &str, persistent: bool) -> Result<()> { - self.set_zend_string(ZendStr::new_interned(val, persistent)?); + self.set_zend_string(ZendStr::new_interned(val, persistent)); Ok(()) } diff --git a/src/wrapper.c b/src/wrapper.c index 240b2d6..faf585e 100644 --- a/src/wrapper.c +++ b/src/wrapper.c @@ -1,7 +1,6 @@ #include "wrapper.h" -zend_string *ext_php_rs_zend_string_init(const char *str, size_t len, - bool persistent) { +zend_string *ext_php_rs_zend_string_init(const char *str, size_t len, bool persistent) { return zend_string_init(str, len, persistent); } @@ -9,6 +8,16 @@ void ext_php_rs_zend_string_release(zend_string *zs) { zend_string_release(zs); } +bool ext_php_rs_is_known_valid_utf8(const zend_string *zs) { + return GC_FLAGS(zs) & IS_STR_VALID_UTF8; +} + +void ext_php_rs_set_known_valid_utf8(zend_string *zs) { + if (!ZSTR_IS_INTERNED(zs)) { + GC_ADD_FLAGS(zs, IS_STR_VALID_UTF8); + } +} + const char *ext_php_rs_php_build_id() { return ZEND_MODULE_BUILD_ID; } void *ext_php_rs_zend_object_alloc(size_t obj_size, zend_class_entry *ce) { diff --git a/src/wrapper.h b/src/wrapper.h index f55f3ec..2813263 100644 --- a/src/wrapper.h +++ b/src/wrapper.h @@ -21,9 +21,11 @@ #include "zend_inheritance.h" #include "zend_interfaces.h" -zend_string *ext_php_rs_zend_string_init(const char *str, size_t len, - bool persistent); +zend_string *ext_php_rs_zend_string_init(const char *str, size_t len, bool persistent); void ext_php_rs_zend_string_release(zend_string *zs); +bool ext_php_rs_is_known_valid_utf8(const zend_string *zs); +void ext_php_rs_set_known_valid_utf8(zend_string *zs); + const char *ext_php_rs_php_build_id(); void *ext_php_rs_zend_object_alloc(size_t obj_size, zend_class_entry *ce); void ext_php_rs_zend_object_release(zend_object *obj); diff --git a/src/zend/class.rs b/src/zend/class.rs index 952302a..e3f6cee 100644 --- a/src/zend/class.rs +++ b/src/zend/class.rs @@ -15,7 +15,7 @@ impl ClassEntry { /// could not be found or the class table has not been initialized. pub fn try_find(name: &str) -> Option<&'static Self> { ExecutorGlobals::get().class_table()?; - let mut name = ZendStr::new(name, false).ok()?; + let mut name = ZendStr::new(name, false); unsafe { crate::ffi::zend_lookup_class_ex(name.deref_mut(), std::ptr::null_mut(), 0).as_ref() @@ -77,7 +77,7 @@ impl ClassEntry { unsafe { self.__bindgen_anon_1.parent.as_ref() } } else { let name = unsafe { self.__bindgen_anon_1.parent_name.as_ref()? }; - Self::try_find(name.as_str()?) + Self::try_find(name.as_str().ok()?) } } } diff --git a/src/zend/handlers.rs b/src/zend/handlers.rs index 3d1371a..4ec4ef9 100644 --- a/src/zend/handlers.rs +++ b/src/zend/handlers.rs @@ -87,11 +87,7 @@ impl ZendObjectHandlers { .ok_or("Invalid property name pointer given")?; let self_ = &mut **obj; let props = T::get_metadata().get_properties(); - let prop = props.get( - prop_name - .as_str() - .ok_or("Invalid property name was given")?, - ); + let prop = props.get(prop_name.as_str()?); // retval needs to be treated as initialized, so we set the type to null let rv_mut = rv.as_mut().ok_or("Invalid return zval given")?; @@ -138,7 +134,7 @@ impl ZendObjectHandlers { .ok_or("Invalid property name pointer given")?; let self_ = &mut **obj; let props = T::get_metadata().get_properties(); - let prop = props.get(prop_name.as_str().ok_or("Invalid property name given")?); + let prop = props.get(prop_name.as_str()?); let value_mut = value.as_mut().ok_or("Invalid return zval given")?; Ok(match prop { @@ -220,7 +216,7 @@ impl ZendObjectHandlers { .as_ref() .ok_or("Invalid property name pointer given")?; let props = T::get_metadata().get_properties(); - let prop = props.get(prop_name.as_str().ok_or("Invalid property name given")?); + let prop = props.get(prop_name.as_str()?); let self_ = &mut **obj; match has_set_exists {