簡體   English   中英

我如何實現std :: io :: Read的迭代器 <Item = String> ?

[英]How do I implement std::io::Read for an Iterator<Item = String>?

更具體地說,我采用標准格式,使用.lines().filter_map()過濾掉某些行,並且我想將其結果與csv::Reader

如果您允許Read::read返回部分讀取,則有一個簡單的實現。 從與Shepmaster相似的結構開始。

use std::io::{self, Read};

struct IteratorAsRead<I>
where
    I: Iterator,
{
    iter: I,
    leftover: Option<(I::Item, usize)>,
}

impl<I> IteratorAsRead<I>
where
    I: Iterator,
{
    pub fn new<T>(iter: T) -> Self
    where
        T: IntoIterator<IntoIter = I, Item = I::Item>,
    {
        IteratorAsRead {
            iter: iter.into_iter(),
            leftover: None,
        }
    }
}

然后,首先查找要讀取的非空字符串,然后嘗試將其寫入緩沖區,然后根據需要存儲所有剩余的內容,以實現該功能。

impl<I> Read for IteratorAsRead<I>
where
    I: Iterator,
    I::Item: AsRef<[u8]>,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        let (leftover, skip) = match self.leftover.take() {
            Some(last) => last,
            None => match self.iter.find(|next| !next.as_ref().is_empty()) {
                Some(next) => (next, 0),
                None => return Ok(0),
            }
        };

        let read = (&leftover.as_ref()[skip..]).read(buf)?;

        if skip + read < leftover.as_ref().len() {
            self.leftover = Some((leftover, skip + read));
        } else {
            self.leftover = None;
        }

        return Ok(read);
    }
}

標准庫提供了std::io::Cursor類型,該類型將緩沖區以及緩沖區中的位置包裝在一起。 這可以用來進一步簡化Veedrac的答案中給出的代碼:

use std::io::{self, Cursor, Read};

struct IteratorAsRead<I>
where
    I: Iterator,
{
    iter: I,
    cursor: Option<Cursor<I::Item>>,
}

impl<I> IteratorAsRead<I>
where
    I: Iterator,
{
    pub fn new<T>(iter: T) -> Self
    where
        T: IntoIterator<IntoIter = I, Item = I::Item>,
    {
        let mut iter = iter.into_iter();
        let cursor = iter.next().map(Cursor::new);
        IteratorAsRead { iter, cursor }
    }
}

impl<I> Read for IteratorAsRead<I>
where
    I: Iterator,
    Cursor<I::Item>: Read,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        while let Some(ref mut cursor) = self.cursor {
            let read = cursor.read(buf)?;
            if read > 0 {
                return Ok(read);
            }
            self.cursor = self.iter.next().map(Cursor::new);
        }
        Ok(0)
    }
}

#[test]
fn small_pieces_are_combined() {
    let iterable = ["h", "e", "l", "l", "o"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = vec![];
    let bytes = reader.read_to_end(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"hello");
}

#[test]
fn partial_reads() {
    let iterable = ["hello"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = [0; 2];

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"he");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"ll");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"o");
}

操場

最簡單的解決方案是將所有輸入一次讀入一個巨型緩沖區,然后從中讀取:

let iterable = ["h", "e", "l", "l", "o"];
let combined_string: String = iterable.iter().cloned().collect(); 
let bytes = combined_string.into_bytes();

let mut buf = vec![];
let bytes = (&bytes[..]).read_to_end(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"hello");

如果確實需要避免將它們全部加載到內存中,則可以實現包裝器,但是它具有一些雜亂的位,因為可用字節數和要讀取的字節數並不總是匹配。 您必須保留一些臨時值來跟蹤自己的位置,有時還需要獲取更多數據才能繼續從以下位置讀取:

use std::io::{self, Read};
use std::cmp;

/// Eagerly consumes elements from the underlying iterator instead of
/// returning partial reads.
struct IteratorAsRead<I>
where
    I: Iterator,
{
    iter: I,
    value: Option<I::Item>,
    offset: usize,
}

impl<I> IteratorAsRead<I>
where
    I: Iterator,
{
    pub fn new<T>(iter: T) -> Self
    where
        T: IntoIterator<IntoIter = I, Item = I::Item>,
    {
        IteratorAsRead {
            iter: iter.into_iter(),
            value: None,
            offset: 0,
        }
    }
}

impl<I> Read for IteratorAsRead<I>
where
    I: Iterator,
    I::Item: AsRef<[u8]>,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        let mut copied = 0;
        loop {
            match self.value.take() {
                None => {
                    match self.iter.next() {
                        None => {
                            return Ok(copied);
                        }
                        Some(value) => {
                            self.value = Some(value);
                        }
                    }
                }
                Some(original_value) => {
                    let entire_value_len = {
                        let entire_value = original_value.as_ref();

                        // Skip over bytes we already copied
                        let value = &entire_value[self.offset..];
                        let buf = &mut buf[copied..];

                        // Make the slices the same length
                        let len_to_copy = cmp::min(value.len(), buf.len());
                        let value = &value[..len_to_copy];
                        let buf = &mut buf[..len_to_copy];

                        // Copy
                        buf.copy_from_slice(value);

                        // Advance our values
                        copied += len_to_copy;
                        self.offset += len_to_copy;

                        entire_value.len()
                    };

                    // If we completely used the value, reset our counters,
                    // otherwise put it back for the next call.
                    if self.offset == entire_value_len {
                        self.offset = 0;
                    } else {
                        self.value = Some(original_value);
                    }
                }
            }

            // If we've filled the buffer, return it
            if copied == buf.len() {
                return Ok(copied);
            }
        }
    }
}

#[test]
fn small_pieces_are_combined() {
    let iterable = ["h", "e", "l", "l", "o"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = vec![];
    let bytes = reader.read_to_end(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"hello");
}

#[test]
fn partial_reads() {
    let iterable = ["hello"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = [0; 2];

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"he");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"ll");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"o");
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM