Simple solution is always the best.

- modify the search function for correctly highlight and sort by match count
This commit is contained in:
Massimo Maggioni 2021-12-20 22:50:39 +01:00
parent 7dff69b6f5
commit cec08de86e
3 changed files with 77 additions and 293 deletions

View File

@ -30,7 +30,8 @@ function replaceHTMLEnt(str) {
} }
function escapeRegExp(string) { function escapeRegExp(string) {
return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&'); //return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&');
return string.replace(/[.*+\-?^${}()[\]\\]/g, '\\$&');
} }
class Search { class Search {
@ -53,14 +54,50 @@ class Search {
this.bindSearchForm(); this.bindSearchForm();
} }
/// clean input keywords list
private reorganizeKeywords(keywords: string[]) {
let tmp: string[] = [];
/// do the search only on keywords length >= 2
for (let i=0; i < keywords.length; i++){
if (keywords[i].length === 1) keywords.splice(i,1);
}
/// Sort keywords from short to long
keywords.sort((a, b) => {
return b.length - a.length
});
/// remove the keywords contained in other keywords
for (let i = 0; i < keywords.length; i++) {
for (let j = 0; j < keywords.length; j++) {
if (i !== j) {
if (keywords[j].includes(keywords[i])) {
keywords.splice(i,1);
i=j=0;
}
}
}
}
return keywords;
}
private async searchKeywords(keywords: string[]) { private async searchKeywords(keywords: string[]) {
const rawData = await this.getData(); const rawData = await this.getData();
let results: pageData[] = []; let results: pageData[] = [];
/// Sort keywords by their length keywords = this.reorganizeKeywords(keywords);
keywords.sort((a, b) => { if (keywords.length === 0) return;
return b.length - a.length
}); /// from a b to a|b for regexp
let k = "";
for (let i = 0; i < keywords.length; i++) {
if (keywords[i] === "")
continue;
if (i == keywords.length - 1) {
k = k + keywords[i];
} else {
k = k + keywords[i] + "|";
}
}
for (const item of rawData) { for (const item of rawData) {
let result = { let result = {
@ -71,42 +108,45 @@ class Search {
let matched = false; let matched = false;
for (const keyword of keywords) { if (k === '') continue;
if (keyword === '') continue;
const regex = new RegExp(escapeRegExp(replaceHTMLEnt(keyword)), 'gi'); const regex = new RegExp(escapeRegExp(replaceHTMLEnt(k)), 'gi');
const contentMatch = regex.exec(result.content); const contentMatch = regex.exec(result.content);
regex.lastIndex = 0; /// Reset regex regex.lastIndex = 0; /// Reset regex
const titleMatch = regex.exec(result.title); const titleMatch = regex.exec(result.title);
regex.lastIndex = 0; /// Reset regex regex.lastIndex = 0; /// Reset regex
if (titleMatch) { if (titleMatch) {
result.title = result.title.replace(regex, Search.marker); result.title = result.title.replace(regex, Search.marker);
/// count the occurrencies in an indirect way
result.matchCount += result.title.split("<mark>").length -1;
}
if (titleMatch || contentMatch) {
matched = true;
let start = 0,
end = 100;
if (contentMatch) {
start = contentMatch.index - 20;
end = contentMatch.index + 80
if (start < 0) start = 0;
} }
if (titleMatch || contentMatch) { if (result.preview.indexOf(k) !== -1) {
matched = true; result.preview = result.preview.replace(regex, Search.marker);
++result.matchCount; /// count the occurrencies in an indirect way
result.matchCount += result.preview.split("<mark>").length -1;
let start = 0, }
end = 100; else {
if (start !== 0) result.preview += `[...] `;
if (contentMatch) { result.preview += `${result.content.slice(start, end).replace(regex, Search.marker)} `;
start = contentMatch.index - 20; /// count the occurrencies in an indirect way
end = contentMatch.index + 80 result.matchCount += result.preview.split("<mark>").length -1;
if (start < 0) start = 0;
}
if (result.preview.indexOf(keyword) !== -1) {
result.preview = result.preview.replace(regex, Search.marker);
}
else {
if (start !== 0) result.preview += `[...] `;
result.preview += `${result.content.slice(start, end).replace(regex, Search.marker)} `;
}
} }
} }
@ -225,7 +265,7 @@ class Search {
<a href={item.permalink}> <a href={item.permalink}>
<div class="article-details"> <div class="article-details">
<h2 class="article-title" dangerouslySetInnerHTML={{ __html: item.title }}></h2> <h2 class="article-title" dangerouslySetInnerHTML={{ __html: item.title }}></h2>
<secion class="article-preview" dangerouslySetInnerHTML={{ __html: item.preview }}></secion> <section class="article-preview" dangerouslySetInnerHTML={{ __html: item.preview }}></section>
</div> </div>
{item.image && {item.image &&
<div class="article-image"> <div class="article-image">
@ -260,4 +300,4 @@ window.addEventListener('load', () => {
}, 0); }, 0);
}) })
export default Search; export default Search;

View File

@ -1,252 +0,0 @@
interface pageData {
title: string,
date: string,
permalink: string,
content: string,
image?: string,
preview: string,
matchCount: number
}
/**
* Escape HTML tags as HTML entities
* Edited from:
* @link https://stackoverflow.com/a/5499821
*/
const tagsToReplace = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
'…': '&hellip;'
};
function replaceTag(tag) {
return tagsToReplace[tag] || tag;
}
function replaceHTMLEnt(str) {
return str.replace(/[&<>"]/g, replaceTag);
}
function escapeRegExp(string) {
return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&');
}
class Search {
private data: pageData[];
private form: HTMLFormElement;
private input: HTMLInputElement;
private list: HTMLDivElement;
private resultTitle: HTMLHeadElement;
private resultTitleTemplate: string;
constructor({ form, input, list, resultTitle, resultTitleTemplate }) {
this.form = form;
this.input = input;
this.list = list;
this.resultTitle = resultTitle;
this.resultTitleTemplate = resultTitleTemplate;
this.handleQueryString();
this.bindQueryStringChange();
this.bindSearchForm();
}
private async searchKeywords(keywords: string[]) {
const rawData = await this.getData();
let results: pageData[] = [];
/// Sort keywords by their length
keywords.sort((a, b) => {
return b.length - a.length
});
for (const item of rawData) {
let result = {
...item,
preview: '',
matchCount: 0
}
let matched = false;
for (const keyword of keywords) {
if (keyword === '') continue;
const regex = new RegExp(escapeRegExp(replaceHTMLEnt(keyword)), 'gi');
const contentMatch = regex.exec(result.content);
regex.lastIndex = 0; /// Reset regex
const titleMatch = regex.exec(result.title);
regex.lastIndex = 0; /// Reset regex
if (titleMatch || contentMatch) {
matched = true;
++result.matchCount;
let start = 0,
end = 100;
if (contentMatch) {
start = contentMatch.index - 20;
end = contentMatch.index + 80
if (start < 0) start = 0;
}
if (result.preview.indexOf(keyword) == -1) {
if (start !== 0) result.preview += `[...] `;
result.preview += `${result.content.slice(start, end)} `;
}
}
}
if (matched) {
result.preview += '[...]';
results.push(result);
}
}
/** Result with more matches appears first */
return results.sort((a, b) => {
return b.matchCount - a.matchCount;
});
}
private async doSearch(keywords: string[]) {
const startTime = performance.now();
const results = await this.searchKeywords(keywords);
this.clear();
for (const item of results) {
this.list.append(Search.render(item));
}
const endTime = performance.now();
this.resultTitle.innerText = this.generateResultTitle(results.length, ((endTime - startTime) / 1000).toPrecision(1));
}
private generateResultTitle(resultLen, time) {
return this.resultTitleTemplate.replace("#PAGES_COUNT", resultLen).replace("#TIME_SECONDS", time);
}
public async getData() {
if (!this.data) {
/// Not fetched yet
const jsonURL = this.form.dataset.json;
this.data = await fetch(jsonURL).then(res => res.json());
}
return this.data;
}
private bindSearchForm() {
let lastSearch = '';
const eventHandler = (e) => {
e.preventDefault();
const keywords = this.input.value;
Search.updateQueryString(keywords, true);
if (keywords === '') {
return this.clear();
}
if (lastSearch === keywords) return;
lastSearch = keywords;
this.doSearch(keywords.split(' '));
}
this.input.addEventListener('input', eventHandler);
this.input.addEventListener('compositionend', eventHandler);
}
private clear() {
this.list.innerHTML = '';
this.resultTitle.innerText = '';
}
private bindQueryStringChange() {
window.addEventListener('popstate', (e) => {
this.handleQueryString()
})
}
private handleQueryString() {
const pageURL = new URL(window.location.toString());
const keywords = pageURL.searchParams.get('keyword');
this.input.value = keywords;
if (keywords) {
this.doSearch(keywords.split(' '));
}
else {
this.clear()
}
}
private static updateQueryString(keywords: string, replaceState = false) {
const pageURL = new URL(window.location.toString());
if (keywords === '') {
pageURL.searchParams.delete('keyword')
}
else {
pageURL.searchParams.set('keyword', keywords);
}
if (replaceState) {
window.history.replaceState('', '', pageURL.toString());
}
else {
window.history.pushState('', '', pageURL.toString());
}
}
public static render(item: pageData) {
return <article>
<a href={item.permalink}>
<div class="article-details">
<h2 class="article-title" dangerouslySetInnerHTML={{ __html: item.title }}></h2>
<secion class="article-preview" dangerouslySetInnerHTML={{ __html: item.preview }}></secion>
</div>
{item.image &&
<div class="article-image">
<img src={item.image} loading="lazy" />
</div>
}
</a>
</article>;
}
}
declare global {
interface Window {
searchResultTitleTemplate: string;
}
}
window.addEventListener('load', () => {
setTimeout(function () {
const searchForm = document.querySelector('.search-form') as HTMLFormElement,
searchInput = searchForm.querySelector('input') as HTMLInputElement,
searchResultList = document.querySelector('.search-result--list') as HTMLDivElement,
searchResultTitle = document.querySelector('.search-result--title') as HTMLHeadingElement;
new Search({
form: searchForm,
input: searchInput,
list: searchResultList,
resultTitle: searchResultTitle,
resultTitleTemplate: window.searchResultTitleTemplate
});
}, 0);
})
export default Search;

View File

@ -119,10 +119,6 @@ params:
tagCloud: tagCloud:
limit: 10 limit: 10
# search modes: original, nohighlight, wholeword, highlight
search:
mode: nohighlight
opengraph: opengraph:
twitter: twitter:
# Your Twitter username # Your Twitter username