Python UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 0: ordinal not in range(128)

Question

I have following code:

# -*- coding: utf-8 -*-

forbiddenWords=['for', 'and', 'nor', 'but', 'or', 'yet', 'so', 'not', 'a', 'the', 'an', 'of', 'in', 'to', 'for', 'with', 'on', 'at', 'from', 'by', 'about', 'as']


def IntoSentences(paragraph):
    paragraph = paragraph.replace("–", "-")
    import nltk.data
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sentenceList = sent_detector.tokenize(paragraph.strip())
    return sentenceList

from Tkinter import *

root = Tk()

var = StringVar()
label = Label( root, textvariable=var)
var.set("Fill in the caps: ")
label.pack()

text = Text(root)
text.pack()

button=Button(root, text ="Create text with caps.", command = IntoSentences(text.get(1.0,END)))
button.pack()

root.mainloop()

When I try to run this code, I get following error:

C:\Users\Indrek>C:\Python27\Myprojects\caps_main.py
Traceback (most recent call last):
  File "C:\Python27\Myprojects\caps_main.py", line 25, in <module>
    button=Button(root, text ="Create text with caps.", command = IntoSentences(
text.get(1.0,END)))
  File "C:\Python27\Myprojects\caps_main.py", line 7, in IntoSentences
    paragraph = paragraph.replace("ŌĆō", "-")
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 0: ordinal
not in range(128)

What is the problem here? I made some research about this problem, but posts i read were not helpful for me. What should I change in my specific code?

Answer 1

My mistake was using command wrong, just like Bryan Oakley said. Now my code is following and everything works:

# -*- coding: utf-8 -*-

forbiddenWords=['for', 'and', 'nor', 'but', 'or', 'yet', 'so', 'not', 'a', 'the', 'an', 'of', 'in', 'to', 'for', 'with', 'on', 'at', 'from', 'by', 'about', 'as']


def IntoSentences(paragraph):
    paragraph = paragraph.replace("–", "-")
    import nltk.data
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sentenceList = sent_detector.tokenize(paragraph.strip())
    return sentenceList

def new_sentences(sentenceList):
    for i in sentenceList:
        import re
        from random import randint
        s6nade_arv=len(lause.split(' '))
        while True:
            asendatava_idx=randint(0,s6nade_arv-1)
            wordList = re.sub("[^\w]", " ",  lause).split()
            asendatav_s6na=wordList[asendatava_idx]
            if asendatav_s6na.lower() not in forbiddenWords:
                break
        uus_lause=lause.replace(asendatav_s6na, "______")
        new_sentences.append(uus_lause)

from Tkinter import *

root = Tk()

var = StringVar()
label = Label( root, textvariable=var)
var.set("Fill in the caps: ")
label.pack()

text = Text(root)
text.pack()

button=Button(root, text ="Create text with caps.", command =lambda: IntoSentences(text.get(1.0,END)))
button.pack()

root.mainloop()

What I changed is I added lambda: to button=Button(root, text ="Create text with caps.", command =lambda: IntoSentences(text.get(1.0,END)))

Python UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 0: ordinal not in range(128)

Question

1 answers

solution1
0 ACCPTED 2014-06-13 11:41:57

Python UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 0: ordinal not in range(128)

Question

1 answers

solution1 0 ACCPTED 2014-06-13 11:41:57

solution1
0 ACCPTED 2014-06-13 11:41:57