Add io-uring driver (#515)

This commit is contained in:
Nikolay Kim 2025-03-11 23:31:41 +05:00 committed by GitHub
parent 47afec7351
commit 60a686b2f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
38 changed files with 1700 additions and 277 deletions

View file

@ -60,6 +60,10 @@ impl Flags {
self.contains(Flags::BUF_R_READY)
}
pub(crate) fn is_waiting_for_read(&self) -> bool {
self.contains(Flags::RD_NOTIFY)
}
pub(crate) fn cannot_read(self) -> bool {
self.intersects(Flags::RD_PAUSED | Flags::BUF_R_FULL)
}

View file

@ -437,7 +437,7 @@ impl<F> Io<F> {
} else {
st.dispatch_task.register(cx.waker());
let ready = flags.contains(Flags::BUF_R_READY);
let ready = flags.is_read_buf_ready();
if flags.cannot_read() {
flags.cleanup_read_flags();
st.read_task.wake();
@ -558,24 +558,28 @@ impl<F> Io<F> {
let st = self.st();
let flags = self.flags();
if flags.is_stopped() {
Poll::Ready(Err(st.error_or_disconnected()))
} else {
let len = st.buffer.write_destination_size();
if len > 0 {
if full {
st.insert_flags(Flags::BUF_W_MUST_FLUSH);
st.dispatch_task.register(cx.waker());
return Poll::Pending;
} else if len >= st.pool.get().write_params_high() << 1 {
st.insert_flags(Flags::BUF_W_BACKPRESSURE);
st.dispatch_task.register(cx.waker());
return Poll::Pending;
}
let len = st.buffer.write_destination_size();
if len > 0 {
if full {
st.insert_flags(Flags::BUF_W_MUST_FLUSH);
st.dispatch_task.register(cx.waker());
return if flags.is_stopped() {
Poll::Ready(Err(st.error_or_disconnected()))
} else {
Poll::Pending
};
} else if len >= st.pool.get().write_params_high() << 1 {
st.insert_flags(Flags::BUF_W_BACKPRESSURE);
st.dispatch_task.register(cx.waker());
return if flags.is_stopped() {
Poll::Ready(Err(st.error_or_disconnected()))
} else {
Poll::Pending
};
}
st.remove_flags(Flags::BUF_W_MUST_FLUSH | Flags::BUF_W_BACKPRESSURE);
Poll::Ready(Ok(()))
}
st.remove_flags(Flags::BUF_W_MUST_FLUSH | Flags::BUF_W_BACKPRESSURE);
Poll::Ready(Ok(()))
}
#[inline]

View file

@ -128,7 +128,7 @@ impl ReadContext {
);
// dest buffer has new data, wake up dispatcher
inner.dispatch_task.wake();
} else if inner.flags.get().contains(Flags::RD_NOTIFY) {
} else if inner.flags.get().is_waiting_for_read() {
// in case of "notify" we must wake up dispatch task
// if we read any data from source
inner.dispatch_task.wake();
@ -447,6 +447,280 @@ impl IoContext {
}
}
/// Get read buffer
pub fn get_read_buf(&self) -> Poll<BytesVec> {
let inner = &self.0 .0;
if let Some(waker) = inner.read_task.take() {
let mut cx = Context::from_waker(&waker);
if let Poll::Ready(ReadStatus::Ready) = self.0.filter().poll_read_ready(&mut cx)
{
let mut buf = if inner.flags.get().is_read_buf_ready() {
// read buffer is still not read by dispatcher
// we cannot touch it
inner.pool.get().get_read_buf()
} else {
inner
.buffer
.get_read_source()
.unwrap_or_else(|| inner.pool.get().get_read_buf())
};
// make sure we've got room
let (hw, lw) = self.0.memory_pool().read_params().unpack();
let remaining = buf.remaining_mut();
if remaining < lw {
buf.reserve(hw - remaining);
}
return Poll::Ready(buf);
}
}
Poll::Pending
}
pub fn release_read_buf(&self, buf: BytesVec) {
let inner = &self.0 .0;
if let Some(mut first_buf) = inner.buffer.get_read_source() {
first_buf.extend_from_slice(&buf);
inner.buffer.set_read_source(&self.0, first_buf);
} else {
inner.buffer.set_read_source(&self.0, buf);
}
}
/// Set read buffer
pub fn set_read_buf(&self, result: io::Result<usize>, buf: BytesVec) -> Poll<()> {
let inner = &self.0 .0;
let (hw, _) = self.0.memory_pool().read_params().unpack();
if let Some(mut first_buf) = inner.buffer.get_read_source() {
first_buf.extend_from_slice(&buf);
inner.buffer.set_read_source(&self.0, first_buf);
} else {
inner.buffer.set_read_source(&self.0, buf);
}
match result {
Ok(0) => {
inner.io_stopped(None);
Poll::Ready(())
}
Ok(nbytes) => {
let filter = self.0.filter();
let res = filter
.process_read_buf(&self.0, &inner.buffer, 0, nbytes)
.and_then(|status| {
if status.nbytes > 0 {
// dest buffer has new data, wake up dispatcher
if inner.buffer.read_destination_size() >= hw {
log::trace!(
"{}: Io read buffer is too large {}, enable read back-pressure",
self.0.tag(),
nbytes
);
inner.insert_flags(Flags::BUF_R_READY | Flags::BUF_R_FULL);
} else {
inner.insert_flags(Flags::BUF_R_READY);
if nbytes >= hw {
// read task is paused because of read back-pressure
// but there is no new data in top most read buffer
// so we need to wake up read task to read more data
// otherwise read task would sleep forever
inner.read_task.wake();
}
}
log::trace!(
"{}: New {} bytes available, wakeup dispatcher",
self.0.tag(),
nbytes
);
inner.dispatch_task.wake();
} else {
if nbytes >= hw {
// read task is paused because of read back-pressure
// but there is no new data in top most read buffer
// so we need to wake up read task to read more data
// otherwise read task would sleep forever
inner.read_task.wake();
}
if inner.flags.get().is_waiting_for_read() {
// in case of "notify" we must wake up dispatch task
// if we read any data from source
inner.dispatch_task.wake();
}
}
// while reading, filter wrote some data
// in that case filters need to process write buffers
// and potentialy wake write task
if status.need_write {
inner.write_task.wake();
filter.process_write_buf(&self.0, &inner.buffer, 0)
} else {
Ok(())
}
});
if let Err(err) = res {
inner.io_stopped(Some(err));
Poll::Ready(())
} else {
self.shutdown_filters();
Poll::Pending
}
}
Err(e) => {
inner.io_stopped(Some(e));
Poll::Ready(())
}
}
}
/// Get write buffer
pub fn get_write_buf(&self) -> Poll<BytesVec> {
let inner = &self.0 .0;
// check write readiness
if let Some(waker) = inner.write_task.take() {
let ready = self
.0
.filter()
.poll_write_ready(&mut Context::from_waker(&waker));
let buf = if matches!(
ready,
Poll::Ready(WriteStatus::Ready | WriteStatus::Shutdown)
) {
inner.buffer.get_write_destination().and_then(|buf| {
if buf.is_empty() {
None
} else {
Some(buf)
}
})
} else {
None
};
if let Some(buf) = buf {
return Poll::Ready(buf);
}
}
Poll::Pending
}
pub fn release_write_buf(&self, mut buf: BytesVec) {
let inner = &self.0 .0;
if let Some(b) = inner.buffer.get_write_destination() {
buf.extend_from_slice(&b);
self.0.memory_pool().release_write_buf(b);
}
inner.buffer.set_write_destination(buf);
// if write buffer is smaller than high watermark value, turn off back-pressure
let len = inner.buffer.write_destination_size();
let mut flags = inner.flags.get();
if len == 0 {
if flags.is_waiting_for_write() {
flags.waiting_for_write_is_done();
inner.dispatch_task.wake();
}
flags.insert(Flags::WR_PAUSED);
inner.flags.set(flags);
} else if flags.contains(Flags::BUF_W_BACKPRESSURE)
&& len < inner.pool.get().write_params_high() << 1
{
flags.remove(Flags::BUF_W_BACKPRESSURE);
inner.flags.set(flags);
inner.dispatch_task.wake();
}
inner.flags.set(flags);
}
/// Set write buffer
pub fn set_write_buf(&self, result: io::Result<usize>, mut buf: BytesVec) -> Poll<()> {
let result = match result {
Ok(0) => {
log::trace!("{}: Disconnected during flush", self.tag());
Err(io::Error::new(
io::ErrorKind::WriteZero,
"failed to write frame to transport",
))
}
Ok(n) => {
if n == buf.len() {
buf.clear();
Ok(0)
} else {
buf.advance(n);
Ok(buf.len())
}
}
Err(e) => Err(e),
};
let inner = &self.0 .0;
// set buffer back
let result = match result {
Ok(0) => {
// log::debug!("{}: WROTE ALL {:?}", self.0.tag(), inner.buffer.write_destination_size());
self.0.memory_pool().release_write_buf(buf);
Ok(inner.buffer.write_destination_size())
}
Ok(_) => {
if let Some(b) = inner.buffer.get_write_destination() {
buf.extend_from_slice(&b);
self.0.memory_pool().release_write_buf(b);
}
let l = buf.len();
// log::debug!("{}: WROTE SOME {:?}", self.0.tag(), l);
inner.buffer.set_write_destination(buf);
Ok(l)
}
Err(e) => Err(e),
};
let mut flags = inner.flags.get();
match result {
Ok(0) => {
// all data has been written
flags.insert(Flags::WR_PAUSED);
if flags.is_task_waiting_for_write() {
flags.task_waiting_for_write_is_done();
inner.write_task.wake();
}
if flags.is_waiting_for_write() {
flags.waiting_for_write_is_done();
inner.dispatch_task.wake();
}
inner.flags.set(flags);
Poll::Ready(())
}
Ok(len) => {
// if write buffer is smaller than high watermark value, turn off back-pressure
if flags.contains(Flags::BUF_W_BACKPRESSURE)
&& len < inner.pool.get().write_params_high() << 1
{
flags.remove(Flags::BUF_W_BACKPRESSURE);
inner.flags.set(flags);
inner.dispatch_task.wake();
}
Poll::Pending
}
Err(e) => {
inner.io_stopped(Some(e));
Poll::Ready(())
}
}
}
/// Get read buffer
pub fn with_read_buf<F>(&self, f: F) -> Poll<()>
where
@ -509,7 +783,7 @@ impl IoContext {
// otherwise read task would sleep forever
inner.read_task.wake();
}
if inner.flags.get().contains(Flags::RD_NOTIFY) {
if inner.flags.get().is_waiting_for_read() {
// in case of "notify" we must wake up dispatch task
// if we read any data from source
inner.dispatch_task.wake();