feat: allows ZendStr to contain null bytes (#202)

Closes https://github.com/davidcole1340/ext-php-rs/issues/200

## Rationale
In PHP zend_strings are binary strings with no encoding information. They can contain any byte at any position.
The current implementation use `CString` to transfer zend_strings between Rust and PHP, which prevents zend_strings containing null-bytes to roundtrip through the ffi layer. Moreover, `ZendStr::new()` accepts only a `&str`, which is incorrect since a zend_string is not required to be valid UTF8.

When reading the PHP source code, it is apparent that  most functions marked with `ZEND_API` that accept a `const *char` are convenience wrappers that convert the `const *char` to a zend_string and delegate to another function. For example [zend_throw_exception()](eb83e0206c/Zend/zend_exceptions.c (L823)) takes a `const *char message`, and just converts it to a zend_string before delegating to [zend_throw_exception_zstr()](eb83e0206c/Zend/zend_exceptions.c (L795)).

I kept this PR focused around `ZendStr` and it's usages in the library, but it should be seen as the first step of a more global effort to remove usages of `CString` everywhere possible.

Also, I didn't change the return type of the string related methods of `Zval` (e.g. I could have made `Zval::set_string()` 
 accept an `impl AsRef<[u8]>` instead of `&str` and return `()` instead of `Result<()>`). If I get feedback that it should be done in this PR, I'll do it.

## Summary of the changes:
### ZendStr
* [BC break]: `ZendStr::new()` and `ZendStr::new_interned()` now accept an `impl AsRef<[u8]>` instead of just `&str`, and are therefore infaillible (outside of the cases where we panic, e.g. when allocation fails). This is a BC break, but it's impact shouldn't be huge (users will most likely just have to remove a bunch of `?` or add a few `Ok()`).
* [BC break]: Conversely, `ZendStr::as_c_str()` now returns a `Result<&CStr>` since it can fail on strings containing null bytes.
* [BC break]: `ZensStr::as_str()` now returns a `Result<&str>` instead of an `Option<&str>` since we have to return an error in case of invalid UTF8.
* adds method `ZendStr::as_bytes()` to return the underlying byte slice.
* adds convenience methods `ZendStr::as_ptr()` and `ZendStr::as_mut_ptr()` to return raw pointers to the zend_string.

 ### ZendStr conversion traits
* adds `impl AsRef<[u8]> for ZendStr`
* [BC break]: replaces `impl TryFrom<String> for ZBox<ZendStr>` by `impl From<String> for ZBox<ZendStr>`.
* [BC break]: replaces `impl TryFrom<&str> for ZBox<ZendStr>` by `impl From<&str> for ZBox<ZendStr>`.
* [BC break]: replaces `impl From<&ZendStr> for &CStr` by `impl TryFrom<&ZendStr> for &CStr`.

### Error
* adds new enum member `Error::InvalidUtf8` used when converting a `ZendStr` to `String` or `&str`
This commit is contained in:
ju1ius 2022-12-09 10:54:17 +01:00 committed by GitHub
parent 9e08e253dc
commit d52a878e7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 135 additions and 89 deletions

3
.gitignore vendored
View File

@ -2,4 +2,5 @@
Cargo.lock
/.vscode
/.idea
expand.rs
/tmp
expand.rs

View File

@ -43,6 +43,8 @@ bind! {
// ext_php_rs_zend_object_release,
// ext_php_rs_zend_string_init,
// ext_php_rs_zend_string_release,
// ext_php_rs_is_kown_valid_utf8,
// ext_php_rs_set_kown_valid_utf8,
object_properties_init,
php_info_print_table_end,
php_info_print_table_header,

View File

@ -226,7 +226,7 @@ impl ClassBuilder {
///
/// Returns an [`Error`] variant if the class could not be registered.
pub fn build(mut self) -> Result<&'static mut ClassEntry> {
self.ce.name = ZendStr::new_interned(&self.name, true)?.into_raw();
self.ce.name = ZendStr::new_interned(&self.name, true).into_raw();
self.methods.push(FunctionEntry::end());
let func = Box::into_raw(self.methods.into_boxed_slice()) as *const FunctionEntry;

View File

@ -48,6 +48,8 @@ pub enum Error {
/// The string could not be converted into a C-string due to the presence of
/// a NUL character.
InvalidCString,
/// The string could not be converted into a valid Utf8 string
InvalidUtf8,
/// Could not call the given function.
Callable,
/// An invalid exception type was thrown.
@ -82,6 +84,7 @@ impl Display for Error {
f,
"String given contains NUL-bytes which cannot be present in a C string."
),
Error::InvalidUtf8 => write!(f, "Invalid Utf8 byte sequence."),
Error::Callable => write!(f, "Could not call given function."),
Error::InvalidException(flags) => {
write!(f, "Invalid exception type was thrown: {:?}", flags)

View File

@ -19,6 +19,9 @@ extern "C" {
persistent: bool,
) -> *mut zend_string;
pub fn ext_php_rs_zend_string_release(zs: *mut zend_string);
pub fn ext_php_rs_is_known_valid_utf8(zs: *const zend_string) -> bool;
pub fn ext_php_rs_set_known_valid_utf8(zs: *mut zend_string);
pub fn ext_php_rs_php_build_id() -> *const c_char;
pub fn ext_php_rs_zend_object_alloc(obj_size: usize, ce: *mut zend_class_entry) -> *mut c_void;
pub fn ext_php_rs_zend_object_release(obj: *mut zend_object);

View File

@ -137,7 +137,7 @@ impl ZendObject {
return Err(Error::InvalidProperty);
}
let mut name = ZendStr::new(name, false)?;
let mut name = ZendStr::new(name, false);
let mut rv = Zval::new();
let zv = unsafe {
@ -162,7 +162,7 @@ impl ZendObject {
/// * `name` - The name of the property.
/// * `value` - The value to set the property to.
pub fn set_property(&mut self, name: &str, value: impl IntoZval) -> Result<()> {
let mut name = ZendStr::new(name, false)?;
let mut name = ZendStr::new(name, false);
let mut value = value.into_zval(false)?;
unsafe {
@ -187,7 +187,7 @@ impl ZendObject {
/// * `name` - The name of the property.
/// * `query` - The 'query' to classify if a property exists.
pub fn has_property(&self, name: &str, query: PropertyQuery) -> Result<bool> {
let mut name = ZendStr::new(name, false)?;
let mut name = ZendStr::new(name, false);
Ok(unsafe {
self.handlers()?.has_property.ok_or(Error::InvalidScope)?(

View File

@ -16,6 +16,7 @@ use crate::{
convert::{FromZval, IntoZval},
error::{Error, Result},
ffi::{
ext_php_rs_is_known_valid_utf8, ext_php_rs_set_known_valid_utf8,
ext_php_rs_zend_string_init, ext_php_rs_zend_string_release, zend_string,
zend_string_init_interned,
},
@ -30,7 +31,7 @@ use crate::{
/// cannot represent unsized types, an array of size 1 is used at the end of the
/// type to represent the contents of the string, therefore this type is
/// actually unsized. All constructors return [`ZBox<ZendStr>`], the owned
/// varaint.
/// variant.
///
/// Once the `ptr_metadata` feature lands in stable rust, this type can
/// potentially be changed to a DST using slices and metadata. See the tracking issue here: <https://github.com/rust-lang/rust/issues/81513>
@ -46,7 +47,7 @@ static INTERNED_LOCK: Mutex<()> = const_mutex(());
// on the alias `ZendStr` :( <https://github.com/rust-lang/rust-clippy/issues/7702>
#[allow(clippy::len_without_is_empty)]
impl ZendStr {
/// Creates a new Zend string from a [`str`].
/// Creates a new Zend string from a slice of bytes.
///
/// # Parameters
///
@ -54,12 +55,6 @@ impl ZendStr {
/// * `persistent` - Whether the string should persist through the request
/// boundary.
///
/// # Returns
///
/// Returns a result containing the Zend string if successful. Returns an
/// error if the given string contains NUL bytes, which cannot be
/// contained inside a C string.
///
/// # Panics
///
/// Panics if the function was unable to allocate memory for the Zend
@ -78,10 +73,19 @@ impl ZendStr {
/// ```no_run
/// use ext_php_rs::types::ZendStr;
///
/// let s = ZendStr::new("Hello, world!", false).unwrap();
/// let s = ZendStr::new("Hello, world!", false);
/// let php = ZendStr::new([80, 72, 80], false);
/// ```
pub fn new(str: &str, persistent: bool) -> Result<ZBox<Self>> {
Ok(Self::from_c_str(&CString::new(str)?, persistent))
pub fn new(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
let s = str.as_ref();
// TODO: we should handle the special cases when length is either 0 or 1
// see `zend_string_init_fast()` in `zend_string.h`
unsafe {
let ptr = ext_php_rs_zend_string_init(s.as_ptr().cast(), s.len(), persistent)
.as_mut()
.expect("Failed to allocate memory for new Zend string");
ZBox::from_raw(ptr)
}
}
/// Creates a new Zend string from a [`CStr`].
@ -126,7 +130,7 @@ impl ZendStr {
}
}
/// Creates a new interned Zend string from a [`str`].
/// Creates a new interned Zend string from a slice of bytes.
///
/// An interned string is only ever stored once and is immutable. PHP stores
/// the string in an internal hashtable which stores the interned
@ -145,16 +149,12 @@ impl ZendStr {
/// * `persistent` - Whether the string should persist through the request
/// boundary.
///
/// # Returns
///
/// Returns a result containing the Zend string if successful. Returns an
/// error if the given string contains NUL bytes, which cannot be
/// contained inside a C string.
///
/// # Panics
///
/// Panics if the function was unable to allocate memory for the Zend
/// string.
/// Panics under the following circumstances:
///
/// * The function used to create interned strings has not been set.
/// * The function could not allocate enough memory for the Zend string.
///
/// # Safety
///
@ -171,8 +171,16 @@ impl ZendStr {
///
/// let s = ZendStr::new_interned("PHP", true);
/// ```
pub fn new_interned(str: &str, persistent: bool) -> Result<ZBox<Self>> {
Ok(Self::interned_from_c_str(&CString::new(str)?, persistent))
pub fn new_interned(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
let _lock = INTERNED_LOCK.lock();
let s = str.as_ref();
unsafe {
let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
let ptr = init(s.as_ptr().cast(), s.len() as _, persistent)
.as_mut()
.expect("Failed to allocate memory for new Zend string");
ZBox::from_raw(ptr)
}
}
/// Creates a new interned Zend string from a [`CStr`].
@ -222,11 +230,8 @@ impl ZendStr {
let _lock = INTERNED_LOCK.lock();
unsafe {
let ptr = zend_string_init_interned.expect("`zend_string_init_interned` not ready")(
str.as_ptr(),
str.to_bytes().len() as _,
persistent,
);
let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
let ptr = init(str.as_ptr(), str.to_bytes().len() as _, persistent);
ZBox::from_raw(
ptr.as_mut()
@ -242,7 +247,7 @@ impl ZendStr {
/// ```no_run
/// use ext_php_rs::types::ZendStr;
///
/// let s = ZendStr::new("hello, world!", false).unwrap();
/// let s = ZendStr::new("hello, world!", false);
/// assert_eq!(s.len(), 13);
/// ```
pub fn len(&self) -> usize {
@ -256,39 +261,61 @@ impl ZendStr {
/// ```no_run
/// use ext_php_rs::types::ZendStr;
///
/// let s = ZendStr::new("hello, world!", false).unwrap();
/// let s = ZendStr::new("hello, world!", false);
/// assert_eq!(s.is_empty(), false);
/// ```
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Returns a reference to the underlying [`CStr`] inside the Zend string.
pub fn as_c_str(&self) -> &CStr {
// SAFETY: Zend strings store their readable length in a fat pointer.
unsafe {
let slice = slice::from_raw_parts(self.val.as_ptr() as *const u8, self.len() + 1);
CStr::from_bytes_with_nul_unchecked(slice)
}
/// Attempts to return a reference to the underlying bytes inside the Zend
/// string as a [`CStr`].
///
/// Returns an [Error::InvalidCString] variant if the string contains null
/// bytes.
pub fn as_c_str(&self) -> Result<&CStr> {
let bytes_with_null =
unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len() + 1) };
CStr::from_bytes_with_nul(bytes_with_null).map_err(|_| Error::InvalidCString)
}
/// Attempts to return a reference to the underlying [`str`] inside the Zend
/// Attempts to return a reference to the underlying bytes inside the Zend
/// string.
///
/// Returns the [`None`] variant if the [`CStr`] contains non-UTF-8
/// characters.
/// Returns an [Error::InvalidUtf8] variant if the [`str`] contains
/// non-UTF-8 characters.
///
/// # Example
///
/// ```no_run
/// use ext_php_rs::types::ZendStr;
///
/// let s = ZendStr::new("hello, world!", false).unwrap();
/// let as_str = s.as_str();
/// assert_eq!(as_str, Some("hello, world!"));
/// let s = ZendStr::new("hello, world!", false);
/// assert!(s.as_str().is_ok());
/// ```
pub fn as_str(&self) -> Option<&str> {
self.as_c_str().to_str().ok()
pub fn as_str(&self) -> Result<&str> {
if unsafe { ext_php_rs_is_known_valid_utf8(self.as_ptr()) } {
let str = unsafe { std::str::from_utf8_unchecked(self.as_bytes()) };
return Ok(str);
}
let str = std::str::from_utf8(self.as_bytes()).map_err(|_| Error::InvalidUtf8)?;
unsafe { ext_php_rs_set_known_valid_utf8(self.as_ptr() as *mut _) };
Ok(str)
}
/// Returns a reference to the underlying bytes inside the Zend string.
pub fn as_bytes(&self) -> &[u8] {
unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len()) }
}
/// Returns a raw pointer to this object
pub fn as_ptr(&self) -> *const ZendStr {
self as *const _
}
/// Returns a mutable pointer to this object
pub fn as_mut_ptr(&mut self) -> *mut ZendStr {
self as *mut _
}
}
@ -300,7 +327,22 @@ unsafe impl ZBoxable for ZendStr {
impl Debug for ZendStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.as_c_str().fmt(f)
self.as_str().fmt(f)
}
}
impl AsRef<[u8]> for ZendStr {
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl<T> PartialEq<T> for ZendStr
where
T: AsRef<[u8]>,
{
fn eq(&self, other: &T) -> bool {
self.as_ref() == other.as_ref()
}
}
@ -308,19 +350,14 @@ impl ToOwned for ZendStr {
type Owned = ZBox<ZendStr>;
fn to_owned(&self) -> Self::Owned {
Self::from_c_str(self.as_c_str(), false)
Self::new(self.as_bytes(), false)
}
}
impl PartialEq for ZendStr {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_c_str().eq(other.as_c_str())
}
}
impl<'a> TryFrom<&'a ZendStr> for &'a CStr {
type Error = Error;
impl<'a> From<&'a ZendStr> for &'a CStr {
fn from(value: &'a ZendStr) -> Self {
fn try_from(value: &'a ZendStr) -> Result<Self> {
value.as_c_str()
}
}
@ -329,7 +366,7 @@ impl<'a> TryFrom<&'a ZendStr> for &'a str {
type Error = Error;
fn try_from(value: &'a ZendStr) -> Result<Self> {
value.as_str().ok_or(Error::InvalidCString)
value.as_str()
}
}
@ -337,10 +374,7 @@ impl TryFrom<&ZendStr> for String {
type Error = Error;
fn try_from(value: &ZendStr) -> Result<Self> {
value
.as_str()
.map(|s| s.to_string())
.ok_or(Error::InvalidCString)
value.as_str().map(ToString::to_string)
}
}
@ -362,18 +396,14 @@ impl From<CString> for ZBox<ZendStr> {
}
}
impl TryFrom<&str> for ZBox<ZendStr> {
type Error = Error;
fn try_from(value: &str) -> Result<Self> {
ZendStr::new(value, false)
impl From<&str> for ZBox<ZendStr> {
fn from(value: &str) -> Self {
ZendStr::new(value.as_bytes(), false)
}
}
impl TryFrom<String> for ZBox<ZendStr> {
type Error = Error;
fn try_from(value: String) -> Result<Self> {
impl From<String> for ZBox<ZendStr> {
fn from(value: String) -> Self {
ZendStr::new(value.as_str(), false)
}
}

View File

@ -113,7 +113,7 @@ impl Zval {
/// convert other types into a [`String`], as it could not pass back a
/// [`&str`] in those cases.
pub fn str(&self) -> Option<&str> {
self.zend_str().and_then(|zs| zs.as_str())
self.zend_str().and_then(|zs| zs.as_str().ok())
}
/// Returns the value of the zval if it is a string and can be unpacked into
@ -340,7 +340,7 @@ impl Zval {
/// * `val` - The value to set the zval as.
/// * `persistent` - Whether the string should persist between requests.
pub fn set_string(&mut self, val: &str, persistent: bool) -> Result<()> {
self.set_zend_string(ZendStr::new(val, persistent)?);
self.set_zend_string(ZendStr::new(val, persistent));
Ok(())
}
@ -374,7 +374,7 @@ impl Zval {
/// * `val` - The value to set the zval as.
/// * `persistent` - Whether the string should persist between requests.
pub fn set_interned_string(&mut self, val: &str, persistent: bool) -> Result<()> {
self.set_zend_string(ZendStr::new_interned(val, persistent)?);
self.set_zend_string(ZendStr::new_interned(val, persistent));
Ok(())
}

View File

@ -1,7 +1,6 @@
#include "wrapper.h"
zend_string *ext_php_rs_zend_string_init(const char *str, size_t len,
bool persistent) {
zend_string *ext_php_rs_zend_string_init(const char *str, size_t len, bool persistent) {
return zend_string_init(str, len, persistent);
}
@ -9,6 +8,16 @@ void ext_php_rs_zend_string_release(zend_string *zs) {
zend_string_release(zs);
}
bool ext_php_rs_is_known_valid_utf8(const zend_string *zs) {
return GC_FLAGS(zs) & IS_STR_VALID_UTF8;
}
void ext_php_rs_set_known_valid_utf8(zend_string *zs) {
if (!ZSTR_IS_INTERNED(zs)) {
GC_ADD_FLAGS(zs, IS_STR_VALID_UTF8);
}
}
const char *ext_php_rs_php_build_id() { return ZEND_MODULE_BUILD_ID; }
void *ext_php_rs_zend_object_alloc(size_t obj_size, zend_class_entry *ce) {

View File

@ -21,9 +21,11 @@
#include "zend_inheritance.h"
#include "zend_interfaces.h"
zend_string *ext_php_rs_zend_string_init(const char *str, size_t len,
bool persistent);
zend_string *ext_php_rs_zend_string_init(const char *str, size_t len, bool persistent);
void ext_php_rs_zend_string_release(zend_string *zs);
bool ext_php_rs_is_known_valid_utf8(const zend_string *zs);
void ext_php_rs_set_known_valid_utf8(zend_string *zs);
const char *ext_php_rs_php_build_id();
void *ext_php_rs_zend_object_alloc(size_t obj_size, zend_class_entry *ce);
void ext_php_rs_zend_object_release(zend_object *obj);

View File

@ -15,7 +15,7 @@ impl ClassEntry {
/// could not be found or the class table has not been initialized.
pub fn try_find(name: &str) -> Option<&'static Self> {
ExecutorGlobals::get().class_table()?;
let mut name = ZendStr::new(name, false).ok()?;
let mut name = ZendStr::new(name, false);
unsafe {
crate::ffi::zend_lookup_class_ex(name.deref_mut(), std::ptr::null_mut(), 0).as_ref()
@ -77,7 +77,7 @@ impl ClassEntry {
unsafe { self.__bindgen_anon_1.parent.as_ref() }
} else {
let name = unsafe { self.__bindgen_anon_1.parent_name.as_ref()? };
Self::try_find(name.as_str()?)
Self::try_find(name.as_str().ok()?)
}
}
}

View File

@ -87,11 +87,7 @@ impl ZendObjectHandlers {
.ok_or("Invalid property name pointer given")?;
let self_ = &mut **obj;
let props = T::get_metadata().get_properties();
let prop = props.get(
prop_name
.as_str()
.ok_or("Invalid property name was given")?,
);
let prop = props.get(prop_name.as_str()?);
// retval needs to be treated as initialized, so we set the type to null
let rv_mut = rv.as_mut().ok_or("Invalid return zval given")?;
@ -138,7 +134,7 @@ impl ZendObjectHandlers {
.ok_or("Invalid property name pointer given")?;
let self_ = &mut **obj;
let props = T::get_metadata().get_properties();
let prop = props.get(prop_name.as_str().ok_or("Invalid property name given")?);
let prop = props.get(prop_name.as_str()?);
let value_mut = value.as_mut().ok_or("Invalid return zval given")?;
Ok(match prop {
@ -220,7 +216,7 @@ impl ZendObjectHandlers {
.as_ref()
.ok_or("Invalid property name pointer given")?;
let props = T::get_metadata().get_properties();
let prop = props.get(prop_name.as_str().ok_or("Invalid property name given")?);
let prop = props.get(prop_name.as_str()?);
let self_ = &mut **obj;
match has_set_exists {