I'm using default Database Backend for search function in my project:
from __future__ import absolute_import, unicode_literals
from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator
from django.shortcuts import render
from home.models import BlogPage, get_all_tags
from wagtail.wagtailsearch.models import Query
def search(request):
search_query = request.GET.get('query', None)
page = request.GET.get('page', 1)
# Search
if search_query:
search_results = BlogPage.objects.live().search(search_query)
query = Query.get(search_query)
# Record hit
query.add_hit()
else:
search_results = BlogPage.objects.none()
# Pagination
paginator = Paginator(search_results, 10)
try:
search_results = paginator.page(page)
except PageNotAnInteger:
search_results = paginator.page(1)
except EmptyPage:
search_results = paginator.page(paginator.num_pages)
return render(request, 'search/search.html', {
'search_query': search_query,
'blogpages': search_results,
'tags': get_all_tags()
})
BlogPage:
class BlogPage(Page):
date = models.DateField("Post date")
intro = models.CharField(max_length=250)
body = StreamField([
('heading', blocks.CharBlock(classname="full title")),
('paragraph', blocks.RichTextBlock()),
('image', ImageChooserBlock()),
('code', CodeBlock()),
])
tags = ClusterTaggableManager(through=BlogPageTag, blank=True)
search_fields = Page.search_fields + [
index.SearchField('intro'),
index.SearchField('body'),
]
...
And search is working well only if body
fields in BlogPage
model are in english,if I try to use some russian words in the body
fields then it don't search anything. I looked at database and I see that BlogPage
has body
field like this:
[{"value": "\u0442\u0435\u0441\u0442\u043e\u0432\u044b\u0439", "id": "3343151a-edbc-4165-89f2-ce766922d68e", "type": "heading"}, {"value": "<p>\u0442\u0435\u0441\u0442\u0438\u043f\u0440</p>", "id": "22d3818d-8c69-4d72-967e-7c1f807e80b2", "type": "paragraph"}]
So, the problem is wagtail saves Streamfield fields as unicode characters, if I manually change in phpmyadmin to this:
[{"value": "Тест", "id": "3343151a-edbc-4165-89f2-ce766922d68e", "type": "heading"}, {"value": "<p>Тестовый</p>", "id": "22d3818d-8c69-4d72-967e-7c1f807e80b2", "type": "paragraph"}]
Then search start working, so maybe anyone knows how to prevent wagtail from saving Streamfield
fields in unicode?
I hate this workaround, but I decided just to add another fields search_body
and search_intro
and then search using them:
class BlogPage(Page):
date = models.DateField("Post date")
intro = models.CharField(max_length=250)
body = StreamField([
('heading', blocks.CharBlock(classname="full title")),
('paragraph', blocks.RichTextBlock()),
('image', ImageChooserBlock()),
('code', CodeBlock()),
])
search_intro = models.CharField(max_length=250)
search_body = models.CharField(max_length=50000)
tags = ClusterTaggableManager(through=BlogPageTag, blank=True)
def main_image(self):
gallery_item = self.gallery_images.first()
if gallery_item:
return gallery_item.image
else:
return None
def get_context(self, request):
context = super(BlogPage, self).get_context(request)
context['tags'] = get_all_tags()
context['page_url'] = urllib.parse.urljoin(BASE_URL, self.url)
return context
def save(self, *args, **kwargs):
if self.body.stream_data and isinstance(
self.body.stream_data[0], tuple):
self.search_body = ''
for block in self.body.stream_data:
if len(block) >= 2:
self.search_body += str(block[1])
self.search_intro = self.intro.lower()
self.search_body = self.search_body.lower()
return super().save(*args, **kwargs)
search_fields = Page.search_fields + [
index.SearchField('search_intro'),
index.SearchField('search_body'),
]
...
search/views.py:
def search(request):
search_query = request.GET.get('query', None)
page = request.GET.get('page', 1)
# Search
if search_query:
search_results = BlogPage.objects.live().search(search_query.lower())
query = Query.get(search_query)
...
Alexey, thank you!
But I got double call of save method.
And I should use this code:
def save(self, *args, **kwargs):
search_body = ''
if self.blog_post_body.stream_data and isinstance(
self.blog_post_body.stream_data[0], dict):
for block in self.blog_post_body.stream_data:
if block.get('type', '') in ('some_header', 'some_text'):
search_body += str(block['value'])
self.search_body = search_body
super(BlogPost, self).save(*args, **kwargs)
StreamField uses DjangoJSONEncoder to encode JSON, which has ensure_ascii = True. Then you will see the Unicode presented as "\\u....". The default db search backend simply uses a database text match and will fail with the query with non-ASCII keywords.
def get_prep_value(self, value):
if isinstance(value, StreamValue) and not(value) and value.raw_text is not None:
# An empty StreamValue with a nonempty raw_text attribute should have that
# raw_text attribute written back to the db. (This is probably only useful
# for reverse migrations that convert StreamField data back into plain text
# fields.)
return value.raw_text
else:
return json.dumps(self.stream_block.get_prep_value(value), cls=DjangoJSONEncoder)
You need to subclass StreamField and provide a custom JSONEncoder with ensure_ascii=False. But then you need to make sure your database can handle the utf-8 string by default. (Should be fine for PostgreSQL).
If you switch to another backend like the PG search backend. It will extract text from StreamField when building indexes (introduced by https://github.com/wagtail/wagtail/pull/982 ). You won't have a problem.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.