I want to read word by word from a text file. Here's my code in C++:
int main(int argc, const char * argv[]) {
// insert code here...
ifstream file("./wordCount.txt");
string word;
while(file >> word){
cout<<word<<endl;
}
return 0;
}
The text file contains the sentence:
I don't have power, but he has power.
Here's the result I get:
I
don\241\257t
have
power,
but
he
has
power.
Can you tell me how to get the result like the below format:
I
don't
have
power
but
he
has
power
Thanks.
I understand that you're looking for getting rid of the punctuation.
Unfortunately, extracting strings from a stream looks only for spaces as separator. So "don't" or "Hello,world" would be read as one word, and "don' t" or "Hello, world" as two words.
The alternative, is to read the text line by line, and use string::find_first_of()
to jump from separator to separator:
string separator{" \t\r\n,.!?;:"};
string line;
string word;
while(getline (cin, line)){ // read line by line
size_t e,s=0; // s = offset of next word, e = end of next word
do {
s = line.find_first_not_of(separator,s); // skip leading separators
if (s==string::npos) // stop if no word left
break;
e=line.find_first_of(separator, s); // find next separator
string word(line.substr(s,e-s)); // construct the word
cout<<word<<endl;
s=e+1; // position after the separator
} while (e!=string::npos); // loop if end of line not reached
}
The code below, gets rid of punctuation, except of the apostrophe:
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
using namespace std;
int main(int argc, const char * argv[]) {
ifstream file("wordCount.txt");
string word;
while(file >> word) {
for (auto c : word)
if (ispunct(c) && c != '`')
word.erase(word.find_first_of(c));
cout << word << endl;
}
return 0;
}
should produce the desired output:
Georgioss-MacBook-Pro:~ gsamaras$ g++ -Wall -std=c++0x main.cpp
Georgioss-MacBook-Pro:~ gsamaras$ ./a.out
I
don`t
have
power
but
he
has
power
For the problem with some characters, I encourage you to check the encoding of the file, so try doing (as explained here ):
file -I wordCount.txt
wordCount.txt: text/plain; charset=us-ascii
which is what worked for me. Or simply open a text editor and make sure the characters are valid.
To ease debug, I replace the file with std::istringstream.
I also added a bool (class data attribute) to simplify enable/disable of additional diagnostic information. (m_dbg)
#include <algorithm>
#include <chrono>
// 'compressed' chrono access --------------vvvvvvv
typedef std::chrono::high_resolution_clock HRClk_t; // std-chrono-hi-res-clk
typedef HRClk_t::time_point Time_t; // std-chrono-hi-res-clk-time-point
typedef std::chrono::microseconds MS_t; // std-chrono-milliseconds
typedef std::chrono::microseconds US_t; // std-chrono-microseconds
typedef std::chrono::nanoseconds NS_t; // std-chrono-nanoseconds
using namespace std::chrono_literals; // support suffixes like 100ms, 2s, 30us
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>
#include <vector>
class T496_t
{
std::array<char, 256> m_keep;
std::vector<std::string> m_wordVec;
bool m_dbg = false;
public:
T496_t()
{
for (uint i=0; i<256; ++i)
m_keep[i] = static_cast<char>(i);
m_keep[uint(',')] = 0;
m_keep[uint('.')] = 0;
}
~T496_t() = default;
int exec()
{
std::istringstream file(
"Hello\n"
"I don't have power, but he has power.\n"
"I don't have power , but he has power.\n"
); //ifstream file("./wordCount.txt");
uint lineCount = 1;
while(1)
{
std::string line;
(void)std::getline(file, line);
if(file.eof())
{
ltrim(line);
if(0 != line.size())
if(m_dbg) std::cout << __LINE__ << " tail: " << line << std::endl;
break;
}
if(m_dbg) std::cout << "\n line " << lineCount++ << " : '"
<< line << "'\n " << std::setfill('-')
<< std::setw(static_cast<int>(line.size())+12)
<< "-" << std::setfill(' ');
std::cout << '\n';
size_t sz = line.size();
if(0 == sz)
continue; // ignore empty lines
extractWordsFrom(line); // extract words
if(file.eof()) break;
}
return(0);
}
private: // methods
void extractWordsFrom(std::string& unfiltered)
{
std::string line; // filtered
filter(unfiltered, line);
if(0 == line.size()) {
if(m_dbg) std::cout << " empty line" << std::endl; return;
}
size_t indx1 = 0;
do {
while(isspace(line[indx1])) { indx1 += 1; } // skip leading spaces
size_t indx2 = line.find(" ", indx1);
if(std::string::npos == indx2)
{
m_wordVec.push_back(line.substr(indx1));
if(m_dbg) std::cout << " word(" << std::setw(3) << indx1 << ", eoln): ";
std::cout << " " << m_wordVec.back() << std::endl;
break;
}
m_wordVec.push_back(line.substr(indx1, indx2-indx1));
if(m_dbg) std::cout << " word(" << std::setw(3) << indx1 << ","
<< std::setw(3) << indx2 << "): ";
std::cout << " " << m_wordVec.back() << std::endl;
indx1 = indx2+1;
}while(1);
}
void filter(std::string& unfiltered, std::string& line)
{
ltrim(unfiltered); // remove leading blanks
for(uint i=0; i<unfiltered.size(); ++i) // transfer all chars
if(m_keep[unfiltered[i]]) // exception check
line.push_back(unfiltered[i]);
}
// trim from start
void ltrim(std::string &s) {
s.erase(s.begin(),
std::find_if(s.begin(), s.end(),
std::not1(std::ptr_fun<int, int>(std::isspace)) ));
}
// trim from end
void rtrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(),
std::not1(std::ptr_fun<int, int>(std::isspace))).base(),s.end());
}
// trim from both ends
void lrtrim(std::string &s) { rtrim(s); ltrim(s); }
}; // class T496_t
int main(int /*argc*/, char** /*argv[]*/)
{
setlocale(LC_ALL, "");
std::ios::sync_with_stdio(false);
Time_t start_us = HRClk_t::now();
int retVal = -1;
{
T496_t t496;
retVal = t496.exec();
}
auto duration_us = std::chrono::duration_cast<US_t>(HRClk_t::now() - start_us);
std::cout << "\n\n FINI " << duration_us.count() << " us" << std::endl;
return(retVal);
}
// desired output:
// I
// don't
// have
// power
// but
// he
// has
// power
Output from this code:
Hello
I
don't
have
power
but
he
has
power
I
don't
have
power
but
he
has
power
Output with m_dbg=true
line 1 : 'Hello'
-----------------
word( 0, eoln): Hello
line 2 : 'I don't have power, but he has power.'
-------------------------------------------------
word( 0, 1): I
word( 2, 7): don't
word( 8, 12): have
word( 13, 18): power
word( 19, 22): but
word( 23, 25): he
word( 26, 29): has
word( 30, eoln): power
line 3 : 'I don't have power , but he has power.'
---------------------------------------------------
word( 0, 1): I
word( 2, 7): don't
word( 9, 13): have
word( 14, 19): power
word( 21, 24): but
word( 25, 27): he
word( 28, 31): has
word( 32, eoln): power
FINI 215 us
A simple approach is first to filter the string. Remove any punctuation except apostrophe (ie ' ) and replace them with white-space for further manipulation (ie to take advantage of some built-in functions).
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <sstream>
#include <iterator>
using namespace std;
bool isOk(char c)
{
if ( ispunct(c) )
if ( c == '\'' )
return false;
return ispunct(c);
}
int main()
{
ifstream file("data.txt");
string word;
while(file >> word){
std::replace_if(word.begin(), word.end(), isOk, ' ');
istringstream ss(word);
copy(istream_iterator<string>(ss), istream_iterator<string>(), ostream_iterator<string>(cout, "\n"));
}
return 0;
}
The output is
I
don't
have
power
but
he
has
power
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.