From 75555d4ed896b606baac731aed3372a6c2e25621 Mon Sep 17 00:00:00 2001 From: carter Date: Sat, 16 May 2026 11:59:53 -0600 Subject: [PATCH 1/4] Optimize to_writer to avoid double buffering --- benches/image_bench.rs | 56 ++++++++++++++++---------------- src/ser.rs | 73 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 93 insertions(+), 36 deletions(-) diff --git a/benches/image_bench.rs b/benches/image_bench.rs index 4dab0ed..7503673 100644 --- a/benches/image_bench.rs +++ b/benches/image_bench.rs @@ -53,33 +53,6 @@ pub struct VecBytesImage { pub data: Vec, } -// Below two options not currently supported -// // No serde_bytes optimization referenced data instead of copying it -// // Note: Deserializer is not really setup to take advantage of this yet -// #[derive(Deserialize, Serialize, PartialEq, Debug)] -// pub struct RefImage<'a> { -// pub header: Header, -// pub height: u32, -// pub width: u32, -// pub encoding: String, -// pub is_bigendian: u8, -// pub step: u32, -// pub data: &'a [u8], -// } - -// // With serde_bytes optimization, on referenced data -// #[derive(Deserialize, Serialize, PartialEq, Debug)] -// pub struct RefBytesImage<'a> { -// pub header: Header, -// pub height: u32, -// pub width: u32, -// pub encoding: String, -// pub is_bigendian: u8, -// pub step: u32, -// #[serde(with = "serde_bytes")] -// pub data: &'a [u8], -// } - // An alternate expression option that also works #[derive(Deserialize, Serialize, PartialEq, Debug)] pub struct SharedImage { @@ -115,6 +88,20 @@ fn serialize_vec_bytes_image(image: &VecBytesImage) { black_box(roslibrust_serde_rosmsg::to_vec(image).unwrap()); } +#[inline] +fn serialize_vec_bytes_image_to_new_vec(image: &VecBytesImage) { + black_box(roslibrust_serde_rosmsg::to_vec(image).unwrap()); +} + +#[inline] +fn serialize_vec_bytes_image_to_preallocated_vec( + image: &VecBytesImage, + cursor: &mut std::io::Cursor>, +) { + cursor.set_position(0); + black_box(roslibrust_serde_rosmsg::to_writer(cursor, image).unwrap()); +} + fn criterion_benchmark(c: &mut Criterion) { c.bench_function("parse_vec_image", |b| b.iter(|| parse_vec_image())); c.bench_function("parse_vec_bytes_image", |b| { @@ -123,9 +110,24 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("parse_shared_image", |b| b.iter(|| parse_shared_image())); let image: VecBytesImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap(); + + // Original serialization benchmark (kept for backward compatibility) c.bench_function("serialize_vec_bytes_image", |b| { b.iter(|| serialize_vec_bytes_image(&image)) }); + + // Benchmark serialization to a new Vec (allocates on each call) + c.bench_function("serialize_vec_bytes_image_to_new_vec", |b| { + b.iter(|| serialize_vec_bytes_image_to_new_vec(&image)) + }); + + // Benchmark serialization to a pre-allocated Vec (reuses allocation) + // Pre-allocate a buffer large enough for the serialized image + let serialized_size = roslibrust_serde_rosmsg::to_vec(&image).unwrap().len(); + c.bench_function("serialize_vec_bytes_image_to_preallocated_vec", |b| { + let mut cursor = std::io::Cursor::new(Vec::with_capacity(serialized_size)); + b.iter(|| serialize_vec_bytes_image_to_preallocated_vec(&image, &mut cursor)) + }); } criterion_group!( diff --git a/src/ser.rs b/src/ser.rs index 5ccd7a4..d065727 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -421,12 +421,15 @@ impl ser::Error for Error { } } -/// Serialize the given data structure `T` as ROSMSG into the IO stream. +/// Serialize the given data structure `T` as ROSMSG into a seekable IO stream. /// /// Serialization can fail if `T`'s implementation of `Serialize` decides to /// fail. It can also fail if the structure contains unsupported elements. /// -/// Finally, it can also fail due to writer failure. +/// Finally, it can also fail due to writer or seek failure. +/// +/// This function writes a placeholder length, serializes the data directly to the writer, +/// then seeks back to write the correct length. This avoids intermediate buffer allocation. /// /// # Examples /// @@ -439,15 +442,31 @@ impl ser::Error for Error { /// ``` pub fn to_writer(writer: &mut W, value: &T) -> Result<()> where - W: io::Write, + W: io::Write + io::Seek, T: ser::Serialize, { - let mut buffer = Vec::new(); - value.serialize(&mut Serializer::new(&mut buffer))?; - writer - .write_u32::(buffer.len() as u32) - .and_then(|_| writer.write_all(&buffer)) - .map_err(|v| v.into()) + // Write placeholder for length (will be overwritten) + let start_pos = writer.stream_position()?; + writer.write_u32::(0)?; + + // Serialize the value + let data_start = writer.stream_position()?; + let mut serializer = Serializer::new(writer); + value.serialize(&mut serializer)?; + let writer = serializer.into_inner(); + let end_pos = writer.stream_position()?; + + // Calculate the data length + let length = (end_pos - data_start) as u32; + + // Seek back and write the actual length + writer.seek(io::SeekFrom::Start(start_pos))?; + writer.write_u32::(length)?; + + // Seek back to end + writer.seek(io::SeekFrom::Start(end_pos))?; + + Ok(()) } /// Variant of [to_writer] where the 4 bytes for the overall message length are skipped @@ -739,4 +758,40 @@ mod tests { ] == answer ); } + + #[test] + fn to_writer_basic() { + let mut cursor = io::Cursor::new(Vec::new()); + to_writer(&mut cursor, &String::from("Hello, World!")).unwrap(); + assert_eq!(cursor.into_inner(), b"\x11\0\0\0\x0d\0\0\0Hello, World!"); + } + + #[test] + fn to_writer_same_as_to_vec() { + let test_value = vec![1u32, 2u32, 3u32, 4u32, 5u32]; + + let expected = to_vec(&test_value).unwrap(); + + let mut cursor = io::Cursor::new(Vec::new()); + to_writer(&mut cursor, &test_value).unwrap(); + + assert_eq!(cursor.into_inner(), expected); + } + + #[test] + fn to_writer_multiple_times() { + let mut cursor = io::Cursor::new(Vec::new()); + + // First write + to_writer(&mut cursor, &String::from("Hello")).unwrap(); + + // Second write appends + to_writer(&mut cursor, &String::from("World")).unwrap(); + + let result = cursor.into_inner(); + + // Should contain both messages + assert_eq!(&result[0..13], b"\x09\0\0\0\x05\0\0\0Hello"); + assert_eq!(&result[13..26], b"\x09\0\0\0\x05\0\0\0World"); + } } From 20d5dc971ea156802c89d71a3e896a30f384576e Mon Sep 17 00:00:00 2001 From: carter Date: Sat, 16 May 2026 12:03:20 -0600 Subject: [PATCH 2/4] Remove legacy benches --- benches/image_bench.rs | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/benches/image_bench.rs b/benches/image_bench.rs index 7503673..06baa6b 100644 --- a/benches/image_bench.rs +++ b/benches/image_bench.rs @@ -25,18 +25,6 @@ pub struct Time { pub nsecs: u32, } -// Basic Image Representation -#[derive(Deserialize, Serialize, PartialEq, Debug)] -pub struct VecImage { - pub header: Header, - pub height: u32, - pub width: u32, - pub encoding: String, - pub is_bigendian: u8, - pub step: u32, - pub data: Vec, -} - // Includes serde_bytes optimization #[derive(Deserialize, Serialize, PartialEq, Debug)] pub struct VecBytesImage { @@ -53,36 +41,12 @@ pub struct VecBytesImage { pub data: Vec, } -// An alternate expression option that also works -#[derive(Deserialize, Serialize, PartialEq, Debug)] -pub struct SharedImage { - pub header: Header, - pub height: u32, - pub width: u32, - pub encoding: String, - pub is_bigendian: u8, - pub step: u32, - pub data: Box<[u8]>, -} - -#[inline] -fn parse_vec_image() { - let image: VecImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap(); - black_box(image); -} - #[inline] fn parse_vec_bytes_image() { let image: VecBytesImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap(); black_box(image); } -#[inline] -fn parse_shared_image() { - let image: SharedImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap(); - black_box(image); -} - #[inline] fn serialize_vec_bytes_image(image: &VecBytesImage) { black_box(roslibrust_serde_rosmsg::to_vec(image).unwrap()); @@ -103,11 +67,9 @@ fn serialize_vec_bytes_image_to_preallocated_vec( } fn criterion_benchmark(c: &mut Criterion) { - c.bench_function("parse_vec_image", |b| b.iter(|| parse_vec_image())); c.bench_function("parse_vec_bytes_image", |b| { b.iter(|| parse_vec_bytes_image()) }); - c.bench_function("parse_shared_image", |b| b.iter(|| parse_shared_image())); let image: VecBytesImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap(); From f839dd7394a15897bd1385a0b009394baff26abd Mon Sep 17 00:00:00 2001 From: carter Date: Sat, 16 May 2026 13:02:05 -0600 Subject: [PATCH 3/4] Optimize image and string reading time by ~50% by avoiding memset to 0 of Vec contents prior to memcpy --- benches/image_bench.rs | 30 ++++++++++-------------------- src/de.rs | 33 ++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/benches/image_bench.rs b/benches/image_bench.rs index 06baa6b..449ce5c 100644 --- a/benches/image_bench.rs +++ b/benches/image_bench.rs @@ -42,23 +42,18 @@ pub struct VecBytesImage { } #[inline] -fn parse_vec_bytes_image() { +fn parse_image() { let image: VecBytesImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap(); black_box(image); } #[inline] -fn serialize_vec_bytes_image(image: &VecBytesImage) { +fn serialize_image_to_new_vec(image: &VecBytesImage) { black_box(roslibrust_serde_rosmsg::to_vec(image).unwrap()); } #[inline] -fn serialize_vec_bytes_image_to_new_vec(image: &VecBytesImage) { - black_box(roslibrust_serde_rosmsg::to_vec(image).unwrap()); -} - -#[inline] -fn serialize_vec_bytes_image_to_preallocated_vec( +fn serialize_image_to_prealloc_cursor( image: &VecBytesImage, cursor: &mut std::io::Cursor>, ) { @@ -67,34 +62,29 @@ fn serialize_vec_bytes_image_to_preallocated_vec( } fn criterion_benchmark(c: &mut Criterion) { - c.bench_function("parse_vec_bytes_image", |b| { - b.iter(|| parse_vec_bytes_image()) + c.bench_function("parse_image", |b| { + b.iter(|| parse_image()) }); let image: VecBytesImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap(); - // Original serialization benchmark (kept for backward compatibility) - c.bench_function("serialize_vec_bytes_image", |b| { - b.iter(|| serialize_vec_bytes_image(&image)) - }); - // Benchmark serialization to a new Vec (allocates on each call) - c.bench_function("serialize_vec_bytes_image_to_new_vec", |b| { - b.iter(|| serialize_vec_bytes_image_to_new_vec(&image)) + c.bench_function("serialize_image_to_new_vec", |b| { + b.iter(|| serialize_image_to_new_vec(&image)) }); // Benchmark serialization to a pre-allocated Vec (reuses allocation) // Pre-allocate a buffer large enough for the serialized image let serialized_size = roslibrust_serde_rosmsg::to_vec(&image).unwrap().len(); - c.bench_function("serialize_vec_bytes_image_to_preallocated_vec", |b| { + c.bench_function("serialize_image_to_prealloc_cursor", |b| { let mut cursor = std::io::Cursor::new(Vec::with_capacity(serialized_size)); - b.iter(|| serialize_vec_bytes_image_to_preallocated_vec(&image, &mut cursor)) + b.iter(|| serialize_image_to_prealloc_cursor(&image, &mut cursor)) }); } criterion_group!( name = benches; - config = Criterion::default().with_profiler(PProfProfiler::new(1000, Output::Flamegraph(None))); + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); targets = criterion_benchmark ); criterion_main!(benches); diff --git a/src/de.rs b/src/de.rs index 45cd319..6fd0c11 100644 --- a/src/de.rs +++ b/src/de.rs @@ -14,7 +14,7 @@ use super::error::{Error, ErrorKind, Result, ResultExt}; use byteorder::{LittleEndian, ReadBytesExt}; use error_chain::*; use serde::de; -use std::io; +use std::io::{self, Read}; /// A structure for deserializing ROSMSG into Rust values. /// @@ -129,26 +129,33 @@ where .chain_err(|| ErrorKind::EndOfBuffer) } + #[inline] + fn read_exact_vec(&mut self, length: u32) -> Result> { + let length = length as usize; + let mut buffer = Vec::with_capacity(length); + let read = (&mut self.reader) + .take(length as u64) + .read_to_end(&mut buffer) + .chain_err(|| ErrorKind::EndOfBuffer)?; + if read != length { + bail!(ErrorKind::EndOfBuffer); + } + Ok(buffer) + } + #[inline] fn get_string(&mut self) -> Result { let length = self.pop_length()?; self.reserve_bytes(length)?; - let mut buffer = vec![0; length as usize]; - self.reader - .read_exact(&mut buffer) - .chain_err(|| ErrorKind::EndOfBuffer)?; + let buffer = self.read_exact_vec(length)?; String::from_utf8(buffer).chain_err(|| ErrorKind::BadStringData) } #[inline] fn get_byte_buf(&mut self) -> Result> { let length = self.pop_length()?; - let mut buffer = vec![0; length as usize]; - self.reader - .read_exact(&mut buffer) - .chain_err(|| ErrorKind::EndOfBuffer)?; - self.length -= length; - Ok(buffer) + self.reserve_bytes(length)?; + self.read_exact_vec(length) } } @@ -599,7 +606,7 @@ pub fn from_slice<'de, T>(bytes: &[u8]) -> Result where T: de::Deserialize<'de>, { - from_reader(io::Cursor::new(bytes)) + from_reader(bytes) } /// Variant of [from_slice] where the 4 bytes for the overall message length @@ -608,7 +615,7 @@ pub fn from_slice_known_length<'de, T>(bytes: &[u8], length: u32) -> Result where T: de::Deserialize<'de>, { - from_reader_known_length(io::Cursor::new(bytes), length) + from_reader_known_length(bytes, length) } /// Deserialize an instance of type `T` from a string of ROSMSG data. From afafae405cbd2f209a678e1ff2ca8f5c8074ac69 Mon Sep 17 00:00:00 2001 From: carter Date: Sat, 16 May 2026 13:09:48 -0600 Subject: [PATCH 4/4] Lint --- benches/image_bench.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benches/image_bench.rs b/benches/image_bench.rs index 449ce5c..f025222 100644 --- a/benches/image_bench.rs +++ b/benches/image_bench.rs @@ -62,9 +62,7 @@ fn serialize_image_to_prealloc_cursor( } fn criterion_benchmark(c: &mut Criterion) { - c.bench_function("parse_image", |b| { - b.iter(|| parse_image()) - }); + c.bench_function("parse_image", |b| b.iter(|| parse_image())); let image: VecBytesImage = roslibrust_serde_rosmsg::from_slice(IMAGE_DATA).unwrap();