[英]How do I implement std::io::Read for an Iterator<Item = String>?
更具體地說,我采用標准格式,使用.lines().filter_map()
過濾掉某些行,並且我想將其結果與csv::Reader
。
如果您允許Read::read
返回部分讀取,則有一個簡單的實現。 從與Shepmaster相似的結構開始。
use std::io::{self, Read};
struct IteratorAsRead<I>
where
I: Iterator,
{
iter: I,
leftover: Option<(I::Item, usize)>,
}
impl<I> IteratorAsRead<I>
where
I: Iterator,
{
pub fn new<T>(iter: T) -> Self
where
T: IntoIterator<IntoIter = I, Item = I::Item>,
{
IteratorAsRead {
iter: iter.into_iter(),
leftover: None,
}
}
}
然后,首先查找要讀取的非空字符串,然后嘗試將其寫入緩沖區,然后根據需要存儲所有剩余的內容,以實現該功能。
impl<I> Read for IteratorAsRead<I>
where
I: Iterator,
I::Item: AsRef<[u8]>,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let (leftover, skip) = match self.leftover.take() {
Some(last) => last,
None => match self.iter.find(|next| !next.as_ref().is_empty()) {
Some(next) => (next, 0),
None => return Ok(0),
}
};
let read = (&leftover.as_ref()[skip..]).read(buf)?;
if skip + read < leftover.as_ref().len() {
self.leftover = Some((leftover, skip + read));
} else {
self.leftover = None;
}
return Ok(read);
}
}
標准庫提供了std::io::Cursor
類型,該類型將緩沖區以及緩沖區中的位置包裝在一起。 這可以用來進一步簡化Veedrac的答案中給出的代碼:
use std::io::{self, Cursor, Read};
struct IteratorAsRead<I>
where
I: Iterator,
{
iter: I,
cursor: Option<Cursor<I::Item>>,
}
impl<I> IteratorAsRead<I>
where
I: Iterator,
{
pub fn new<T>(iter: T) -> Self
where
T: IntoIterator<IntoIter = I, Item = I::Item>,
{
let mut iter = iter.into_iter();
let cursor = iter.next().map(Cursor::new);
IteratorAsRead { iter, cursor }
}
}
impl<I> Read for IteratorAsRead<I>
where
I: Iterator,
Cursor<I::Item>: Read,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
while let Some(ref mut cursor) = self.cursor {
let read = cursor.read(buf)?;
if read > 0 {
return Ok(read);
}
self.cursor = self.iter.next().map(Cursor::new);
}
Ok(0)
}
}
#[test]
fn small_pieces_are_combined() {
let iterable = ["h", "e", "l", "l", "o"];
let mut reader = IteratorAsRead::new(&iterable);
let mut buf = vec![];
let bytes = reader.read_to_end(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"hello");
}
#[test]
fn partial_reads() {
let iterable = ["hello"];
let mut reader = IteratorAsRead::new(&iterable);
let mut buf = [0; 2];
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"he");
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"ll");
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"o");
}
最簡單的解決方案是將所有輸入一次讀入一個巨型緩沖區,然后從中讀取:
let iterable = ["h", "e", "l", "l", "o"];
let combined_string: String = iterable.iter().cloned().collect();
let bytes = combined_string.into_bytes();
let mut buf = vec![];
let bytes = (&bytes[..]).read_to_end(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"hello");
如果確實需要避免將它們全部加載到內存中,則可以實現包裝器,但是它具有一些雜亂的位,因為可用字節數和要讀取的字節數並不總是匹配。 您必須保留一些臨時值來跟蹤自己的位置,有時還需要獲取更多數據才能繼續從以下位置讀取:
use std::io::{self, Read};
use std::cmp;
/// Eagerly consumes elements from the underlying iterator instead of
/// returning partial reads.
struct IteratorAsRead<I>
where
I: Iterator,
{
iter: I,
value: Option<I::Item>,
offset: usize,
}
impl<I> IteratorAsRead<I>
where
I: Iterator,
{
pub fn new<T>(iter: T) -> Self
where
T: IntoIterator<IntoIter = I, Item = I::Item>,
{
IteratorAsRead {
iter: iter.into_iter(),
value: None,
offset: 0,
}
}
}
impl<I> Read for IteratorAsRead<I>
where
I: Iterator,
I::Item: AsRef<[u8]>,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut copied = 0;
loop {
match self.value.take() {
None => {
match self.iter.next() {
None => {
return Ok(copied);
}
Some(value) => {
self.value = Some(value);
}
}
}
Some(original_value) => {
let entire_value_len = {
let entire_value = original_value.as_ref();
// Skip over bytes we already copied
let value = &entire_value[self.offset..];
let buf = &mut buf[copied..];
// Make the slices the same length
let len_to_copy = cmp::min(value.len(), buf.len());
let value = &value[..len_to_copy];
let buf = &mut buf[..len_to_copy];
// Copy
buf.copy_from_slice(value);
// Advance our values
copied += len_to_copy;
self.offset += len_to_copy;
entire_value.len()
};
// If we completely used the value, reset our counters,
// otherwise put it back for the next call.
if self.offset == entire_value_len {
self.offset = 0;
} else {
self.value = Some(original_value);
}
}
}
// If we've filled the buffer, return it
if copied == buf.len() {
return Ok(copied);
}
}
}
}
#[test]
fn small_pieces_are_combined() {
let iterable = ["h", "e", "l", "l", "o"];
let mut reader = IteratorAsRead::new(&iterable);
let mut buf = vec![];
let bytes = reader.read_to_end(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"hello");
}
#[test]
fn partial_reads() {
let iterable = ["hello"];
let mut reader = IteratorAsRead::new(&iterable);
let mut buf = [0; 2];
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"he");
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"ll");
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"o");
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.