I am iterating over directory structure that has many csv files, I am only interest some of the the csv files that are in that directory:
if os.path.exists(lang_dir):
dirs = os.listdir(lang_dir)
for filename in dirs:
if re.search(r'-.+-template-users-data.csv$',filename):
but for some reason file name with zu-en-template-users-data.csv
doesn't get recognize, I have a feeling that the letter u
in the filename has something to do with. Just to double check above segment of code, I directly went to folder and tried it with python interpreter, and with python interpreter files did get recognized correctly.
>>> import re
>>> import os
>>> dirs = os.listdir("PATH_FOR_THE_DIR/Data/2013_03_06_20_34/zu")
>>> for item in dirs:
... if re.search(r'-.+-template-users-data.csv$',item):
... print item
...
zu-ab-template-users-data.csv
zu-ace-template-users-data.csv
zu-af-template-users-data.csv
zu-ak-template-users-data.csv
zu-als-template-users-data.csv
...
As you can see all the files that starts with zu
showed up.. This means that my regular expression code segment is correct? (to my understanding)
And here is my code:
def templateUserCountStats(root_dir_path, lang_code_file_path):
#dictionary to hold the template count data structure
template_count_dict = dict()
# getting lang codes from csv file
for lang in getLanguageCodes(lang_code_file_path):
# root level key of the dictionary
template_count_dict[lang] = dict()
lang_dir = os.path.join(root_dir_path, lang)
# get all the files as s list in lang dir
if os.path.exists(lang_dir):
dirs = os.listdir(lang_dir)
for filename in dirs:
if re.search(r'-.+-template-users-data.csv$',filename):
lang2 = filename.split("-")[1]
#path = os.path.join(lang_dir, filename)
path = os.path.expanduser(lang_dir + '/' + filename)
#with open(path, 'rb') as template_user_data_file:
try:
template_user_data_file = open(path, 'r')
try:
csv_file_reader = csv.reader(template_user_data_file)
csv_file_reader.next()
# initializing user count for each language
template_count_dict[lang][lang2] = dict()
template_count_dict[lang][lang2]['level1'] = 0
template_count_dict[lang][lang2]['level2'] = 0
template_count_dict[lang][lang2]['level3'] = 0
template_count_dict[lang][lang2]['level4'] = 0
template_count_dict[lang][lang2]['level5'] = 0
template_count_dict[lang][lang2]['levelN'] = 0
#print filename
for row in csv_file_reader:
if row[0] == '1':
template_count_dict[lang][lang2]['level1'] = template_count_dict[lang][lang2]['level1'] + 1
if row[0] == '2':
template_count_dict[lang][lang2]['level2'] = template_count_dict[lang][lang2]['level2'] + 1
if row[0] == '3':
template_count_dict[lang][lang2]['level3'] = template_count_dict[lang][lang2]['level3'] + 1
if row[0] == '4':
template_count_dict[lang][lang2]['level4'] = template_count_dict[lang][lang2]['level4'] + 1
if row[0] == '5':
template_count_dict[lang][lang2]['level5'] = template_count_dict[lang][lang2]['level5'] + 1
if row[0] == 'N':
template_count_dict[lang][lang2]['levelN'] = template_count_dict[lang][lang2]['levelN'] + 1
except csv.Error, e:
print e
except Exception, e:
print e
logging.error(e)
else:
print "path doesn't exist"
return template_count_dict
Is this because regular expression some how interpret u as a pattern...?
No this can't be the reason.
>>> bool(re.search(r'-.+-template-users-data.csv$', 'zu-en-template-users-data.csv'))
True
Your re pattern should work, the problem is somewhere else.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.