cri.dev
about posts rss

Web Workers + search index = decent blog search

Published on
Tagged with web16 workers1 search4

Recently experimented with vanilla JS and a basic Web Worker to set up a search for a static website (but not only).

In this post I’ll go through the basic idea, implementation and examples to set up a blog search for Eleventy, Hugo, Jekyll and other static-site generators.

Browser - Web Worker flow visualized

Click to see the full image

Detailed steps

Browser: loads search script and worker πŸ€–

The search script loads the worker

new Worker("/js/search.worker.js")

A keyup event listener is attached to the search input rendered on the browser

.addEventListener('keyup', () => { ... })

To receive worker search results, the search script defines a onmessage callback function:

worker.onmessage = () => { ... }

Worker: preloads search index πŸ”

On the worker side, a GET request is made to fetch the search.json file that contains an optimized search index

The worker also registers a onmessage callback function to receive search requests from the browser:

onmessage = () => { ... }

Browser: User enters search term πŸ”Ž

Via the worker function postMessage a message is sent to the worker, containing the search term entered by the user.

Worker: responds with search results

Worker searches the received term in an optimized index of words linked to pages and posts.

Browser: renders search results

The browser receives a callback to the previously registered worker.onmessage function.

The search results are shown to the user through various techniques.

Toggle visibility of search results

One technique, perhaps the lighter one, is to simply toggle the visibility of posts based on their href and the search results, e.g.:

// hide all posts 
posts$.querySelectorAll('a').forEach(r => r.classList.add('hidden'))
// show only matching ones with search results
posts$.querySelectorAll(results.map(r => `[href="${r}"]`).join(','))
.forEach(r => r.classList.remove('hidden'))

This works best if you have a search page in which you initially render all your posts and pages on which you want to perform your search on.

Rerender search results

Another approach is to rerender all search results, e.g.:

results$.innerHTML = results.map(toResultHtml).join('')
function toResultHtml(l) {
  return `
  <a class="block searchable-item" href="${ l.url }">
    <h3 class="mt-0">${ l.title }</h3>
  </a>
  `.trim()
}

This works best if you have a lot of pages and you don’t necessarily have a page where you render all your pages initially

Code

To see it working in the wild, check out the /posts page!

The worker script

The full worker script:

(async function () {
  const searchJson = await fetch('/search.json').then(res => res.json())
  const searchKeys = Object.keys(searchJson.index)
  onmessage = function (e) {
    const term = e.data || ''
    let results = term.split(' ').filter(Boolean).reduce((acc, curr) => {
      const startMatches = searchKeys.filter(k => k.includes(curr))
      if (startMatches.length > 0) {
        const matches = startMatches.reduce((a, m) => {
          return a.concat(searchJson.index[m].map(url => searchJson.items.find(item => item.url === url)))
        }, [])

        return acc.concat(matches)
      }
      return acc
    }, [])
    results = [...new Set(results)]
    results.length = 100
    results = results.filter(Boolean).sort((a, b) => +new Date(b.date) - +new Date(a.date))
    .map(a => a.url)
    postMessage({ type: 'search', results, term })
  }
})()

Search script and markup

<div class="searchable">
  <input class="searchable-input" type="text" placeholder="πŸ” Search posts"/>
  <p class="status"></p>
  <div class="postslist"></div>
</div>
const search$ = document.querySelector('.searchable-input')
const results$ = document.querySelector('.postslist')
const status$ = document.querySelector('.status')

if (!search$) console.error('missing search element')
if (!results$) console.error('missing results element')

if (window.Worker) {
  console.log('has worker')
  const worker = new Worker("/js/search.worker.js")
  search$.addEventListener('keyup', function (e) {
    const term = e.target.value.toLowerCase().trim()
    if (term) worker.postMessage(term)
    else {
      status$.innerText = ''
      results$.querySelectorAll('a').forEach(r => r.classList.remove('hidden'))
    }
  })
  worker.onmessage = function(e) {
    if (e.data.type === 'search') {
      const results = e.data.results
      const term = e.data.term
      if (!Array.isArray(results)) return console.info(e.data)
      results$.querySelectorAll('a').forEach(r => r.classList.add('hidden'))
      results$.querySelectorAll(results.map(r => `[href="${r}"]`).join(',')).forEach(r => r.classList.remove('hidden'))
      if (results.length >= 1) {
        status$.innerText = `${results.length} results for ${term}`
      } else {
        status$.innerText = ''
      }
    }
  }
}

create search index

this is the script I use to create the search index:

#!/usr/bin/env node

const fs = require('fs')
const path = require('path')

main()
  .then(console.log)
  .catch(console.error)

async function main() {
  console.log('nlp')
  const search = require('../_site/db.json')

  const allWords = new Map()
  for (const r of search) {
    const words = (r.title + ' ' + (r.content || '')).split(' ').filter(Boolean)
      .map(s => s.trim().replace(/,/g, '').replace(/\(/g, '').replace(/\)/g, '').toLowerCase())
      .filter(Boolean)
      .filter(s => s.includes('&') === false)
      .filter(s => s.includes('.') === false)
      .filter(s => s.includes('://') === false)

    console.log('+', words.length, 'words')
    for (const word of words) {
      let newLinks = (allWords.get(word) || []).concat([r.url])
      newLinks = [...new Set(newLinks)]
      allWords.set(word, newLinks)
    }
  }
  console.log('total words', allWords.size)
  const fileContents = JSON.stringify({
    items: search,
    index: [...allWords.keys()].reduce((acc, curr) => {
      if (Object.keys(acc).length % 100 === 0) console.log(Object.keys(acc).length)
      return { ...acc, [curr]: allWords.get(curr) }
    }, {})
  })
  fs.writeFileSync(path.resolve(__dirname, '..', 'search.json'), fileContents, { encoding: 'utf-8' })
}

It is based on this db.json (coming from an db.njk) file, with eleventy this would look like this:


---
permalink: db.json
eleventyExcludeFromCollections: true
---
[
{%- for post in collections.post %}
{
  "title": "{{ post.data.title }}",
  "url": "{{ post.url | url }}",
  "date": "{{ (post.data.updated or post.date) | rssDate }}",
  "content": "{{ post.templateContent | dehtml | clean | escape }}",
  "tags": [
    {%- for tag in post.tags %}
    "{{ tag }}"{% if not loop.last %},{% endif %}
    {%- endfor %}
  ]
}{% if not loop.last %},{% endif %}
{%- endfor %}
]

Graceful degradation

You could add some simple JS to make the search work without Web Workers. What a shame though:

;(function search () {
  ;[...document.querySelectorAll('.searchable')].forEach(makeSearchable)
  function makeSearchable ($searchable) {
    const $allSearchableItems = [...$searchable.querySelectorAll('.searchable-item')]
    const $search = $searchable.querySelector('input')
    let previousSearchTerm = ''
    $search.addEventListener('keyup', (e) => {
      const searchTerm = e.target.value
      if (searchTerm === '') return $allSearchableItems.forEach($el => $el.classList.remove('hidden'))

      const refining = previousSearchTerm.startsWith(searchTerm)
      const $items = refining ? [...$searchable.querySelectorAll('.searchable-item:not(.hidden)')] : $allSearchableItems
      $items.forEach($el => {
        const text = $el.innerText
        const show = new RegExp(searchTerm, 'i').test(text)
        if (show) {
          $el.classList.remove('hidden')
        } else {
          $el.classList.add('hidden')
        }
      })
      previousSearchTerm = searchTerm
    })
  }
})()

Here, have a slice of pizza πŸ•