简体   繁体   中英

Reading in Wav header - Not setting data size

I'm trying to read in the Header information of a .wav file.

If I have a .wav file that has a low sample rate (22050) it will read all the information in perfectly, however, if I have a higher Sample Rate (8000) then it fails to read in some information:

"dataSize" set's when using a 22050 .wav file however, when using a 8000 .wav file it does not get set and just displays some random numbers.. eg "1672494080" when the actual size is around 4k-4.5k in size.

Any suggestions to where I am going wrong?

EDIT:

#include <iostream>
#include <fstream>
#include <vector>
#include <inttypes.h>
#include <stdint.h>
#include <math.h>

using namespace std;
struct riff_hdr
{
   char id[4];
   uint32_t size;
   char type[4];
};

struct chunk_hdr
{
   char id[4];
   uint32_t size;
};

struct wavefmt
{
   uint16_t format_tag;
   uint16_t channels;
   uint32_t sample_rate;
   uint32_t avg_bytes_sec;
   uint16_t block_align;
   uint16_t bits_per_sample;
   uint16_t extra_size;
};


riff_hdr riff;
chunk_hdr chunk;
wavefmt fmt = {0};
uint32_t padded_size;
vector<uint8_t> chunk_data;

bool readHeader(ifstream &file) {

file.read(reinterpret_cast<char*>(&riff), sizeof(riff));
if (memcmp(riff.id, "RIFF", 4) == 0)
{
    cout << "size=" << riff.size << endl;
    cout << "id=" << string(riff.type, 4) << endl;

    if (memcmp(riff.type, "WAVE", 4) == 0)
    {
        // chunks can be in any order!
        // there is no guarantee that "fmt" is the first chunk.
        // there is no guarantee that "fmt" is immediately followed by "data".
        // There can be other chunks present!

        do {

            file.read(reinterpret_cast<char*>(&chunk), sizeof(chunk));
            padded_size = ((chunk.size + 2 - 1) & ~1);

             cout << "id=" << string(chunk.id, 4) << endl;
             cout << "size=" << chunk.size << endl;
             cout << "padded size=" << padded_size << endl;

             if (memcmp(chunk.id, "fmt\0", 4) == 0) 
             {
                 if (chunk.size < sizeof(wavefmt))
                 {
                     // error!
                     file.ignore(padded_size);
                 }else{

                    // THIS block doesn't seem to be executing
                    chunk_data.resize(padded_size);
                    file.read(reinterpret_cast<char*>(&chunk_data[0]), padded_size);

                    fmt = *(reinterpret_cast<wavefmt*>(&chunk_data[0]));

                    cout << "format_tag=" << fmt.format_tag << endl;
                    cout << "channels=" << fmt.channels << endl;
                    cout << "sample_rate=" << fmt.sample_rate << endl;
                    cout << "avg_bytes_sec=" << fmt.avg_bytes_sec << endl;
                    cout << "block_align=" << fmt.block_align << endl;
                    cout << "bits_per_sample=" << fmt.bits_per_sample << endl;
                    cout << "extra_size=" << fmt.extra_size << endl;
                }

                if(fmt.format_tag != 1)
                {
                    uint8_t *extra_data = &chunk_data[sizeof(wavefmt)];
                }
             }else if(memcmp(chunk.id, "data", 4) == 0) {
                file.ignore(padded_size);
            }else{
                file.ignore(padded_size);
            }
        }while ((!file) && (!file.eof()));
    }
}

    return true;
}

int main()
{
ifstream file("example2.wav");


readHeader(file);
return 0;
}

OUTPUT:

size=41398

id=WAVE

id=fmt

size=18

padded size=18

chunk_data size=0

Where am I going wrong?

You have two problems with your code:

  1. There is a 2-byte integer after the bitsPerSample value that you are not reading. It specifies the size of any extra data in that chunk. If the value of format2 indicates a PCM format only, you can ignore the value of the integer (it will usually be 0 anyway, but it may also be garbage), but you still have to account for its presense. The integer cannot be ignored for non-PCM formats, you have to read the value and then read how many bytes it says. You need to make sure you are reading the entire chunk before then entering your while loop, otherwise you will not be on the correct starting position in the file to read further chunks.

  2. You are not taking into account that chunks are padded to the nearest WORD boundary, but the chunk size does not include any padding. When you call seekg() , you need to round the value up to the next WORD boundary.

Update : based on the new code you posted, it should look more like this instead:

#include <iostream>
#include <fstream>
#include <vector>
#include <inttypes.h>
#include <stdint.h>
#include <math.h>

using namespace std;

// if your compiler does not have pshpack1.h and poppack.h, then
// use #pragma pack instead. It is important that these structures
// be byte-alignd!

#include <pshpack1.h>

struct s_riff_hdr
{
   char id[4];
   uint32_t size;
   char type[4];
};

struct s_chunk_hdr
{
   char id[4];
   uint32_t size;
};

struct s_wavefmt
{
   uint16_t format_tag;
   uint16_t channels;
   uint32_t sample_rate;
   uint32_t avg_bytes_sec;
   uint16_t block_align;
};

struct s_wavefmtex
{
   s_wavefmt fmt;
   uint16_t bits_per_sample;
   uint16_t extra_size;
};

struct s_pcmwavefmt
{
   s_wavefmt fmt;
   uint16_t bits_per_sample;
};

#include <poppack.h>

bool readWave(ifstream &file)
{
    s_riff_hdr riff_hdr;
    s_chunk_hdr chunk_hdr;
    uint32_t padded_size;
    vector<uint8_t> fmt_data;
    s_wavefmt *fmt = NULL;

    file.read(reinterpret_cast<char*>(&riff_hdr), sizeof(riff_hdr));
    if (!file) return false;

    if (memcmp(riff_hdr.id, "RIFF", 4) != 0) return false;

    cout << "size=" << riff_hdr.size << endl;
    cout << "type=" << string(riff_hdr.type, 4) << endl;

    if (memcmp(riff_hdr.type, "WAVE", 4) != 0) return false;

    // chunks can be in any order!
    // there is no guarantee that "fmt" is the first chunk.
    // there is no guarantee that "fmt" is immediately followed by "data".
    // There can be other chunks present!

    do
    {
        file.read(reinterpret_cast<char*>(&chunk_hdr), sizeof(chunk_hdr));
        if (!file) return false;

        padded_size = ((chunk_hdr.size + 1) & ~1);

        cout << "id=" << string(chunk_hdr.id, 4) << endl;
        cout << "size=" << chunk_hdr.size << endl;
        cout << "padded size=" << padded_size << endl;

        if (memcmp(chunk_hdr.id, "fmt ", 4) == 0) 
        {
            if (chunk_hdr.size < sizeof(s_wavefmt)) return false;

            fmt_data.resize(padded_size);

            file.read(reinterpret_cast<char*>(&fmt_data[0]), padded_size);
            if (!file) return false;

            fmt = reinterpret_cast<s_wavefmt*>(&fmt_data[0]);

            cout << "format_tag=" << fmt->format_tag << endl;
            cout << "channels=" << fmt->channels << endl;
            cout << "sample_rate=" << fmt->sample_rate << endl;
            cout << "avg_bytes_sec=" << fmt->avg_bytes_sec << endl;
            cout << "block_align=" << fmt->block_align << endl;

            if (fmt->format_tag == 1) // PCM
            {
                if (chunk_hdr.size < sizeof(s_pcmwavefmt)) return false;

                s_pcmwavefmt *pcm_fmt = reinterpret_cast<s_pcmwavefmt*>(fmt);

                cout << "bits_per_sample=" << pcm_fmt->bits_per_sample << endl;
            }
            else
            {
                if (chunk_hdr.size < sizeof(s_wavefmtex)) return false;

                s_wavefmtex *fmt_ex = reinterpret_cast<s_wavefmtex*>(fmt);

                cout << "bits_per_sample=" << fmt_ex->bits_per_sample << endl;
                cout << "extra_size=" << fmt_ex->extra_size << endl;

                if (fmt_ex->extra_size != 0)
                {
                    if (chunk_hdr.size < (sizeof(s_wavefmtex) + fmt_ex->extra_size)) return false;

                    uint8_t *extra_data = reinterpret_cast<uint8_t*>(fmt_ex + 1);
                    // use extra_data, up to extra_size bytes, as needed...
                }
            }
        }
        else if (memcmp(chunk_hdr.id, "data", 4) == 0)
        {
            // process chunk data, according to fmt, as needed...

            file.ignore(padded_size);
            if (!file) return false;
        }
        else
        {
            // process other chunks as needed...

            file.ignore(padded_size);
            if (!file) return false;
        }
    }
    while (!file.eof());

    return true;
}

int main()
{
    ifstream file("example2.wav");
    readWave(file);
    return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM