Skip to content

Commit

Permalink
Merge pull request #91 from ecmwf-projects/search-typeahead
Browse files Browse the repository at this point in the history
[WIP] support for search typeahead
  • Loading branch information
keul authored Dec 5, 2024
2 parents 0083d1b + 2ac2e34 commit 879a897
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cads_catalogue_api_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
messages,
middlewares,
schema_org,
typeahead,
vocabularies,
)

Expand Down Expand Up @@ -89,6 +90,7 @@ async def lifespan(application: fastapi.FastAPI):
app.include_router(collection_ext.router)
app.include_router(doi.router)
app.include_router(contents.router)
app.include_router(typeahead.router)


def catalogue_openapi() -> dict[str, Any]:
Expand Down
61 changes: 61 additions & 0 deletions cads_catalogue_api_service/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,67 @@
WEIGHT_FULLTEXT = 0.03


def apply_filters_typeahead(
session: sa.orm.Session,
chars: str,
search: sa.orm.Query | None = None,
portals: list[str] | None = None,
limit: int | None = None,
):
"""Apply filters to return words matching initial input characters, as suggestions for searching datasets.
Args:
session: sqlalchemy session object
chars: initial characters of the words to find
search: current dataset query
portals: list of datasets portals to consider
limit: if specified, limit length of resulting words
"""
if search is None:
search = session.query(cads_catalogue.database.Resource)
search = search.filter(cads_catalogue.database.Resource.hidden == False) # noqa E712
if portals:
search = search.filter(cads_catalogue.database.Resource.portal.in_(portals))
g = sa.func.unnest(
sa.func.string_to_array(
sa.func.lower(cads_catalogue.database.Resource.title), " "
)
).label("g")
t = search.with_entities(g).scalar_subquery().alias("t")
suggestion = sa.func.unnest(sa.func.array_agg(sa.func.distinct(t.c.g))).label(
"suggestion"
)
tt = session.query(suggestion).select_from(t).scalar_subquery().alias("tt")
# consider only (resulting words with length > 2) AND (words starting with chars):
filter = sa.and_(
sa.func.length(tt.c.suggestion).__gt__(2), tt.c.suggestion.ilike(chars + "%")
)
search = (
session.query(tt.c.suggestion)
.select_from(tt)
.filter(filter)
.order_by(tt.c.suggestion)
)
if limit is not None:
search = search.limit(limit) # type: ignore

# final sql for `apply_filters_typeahead(session, 'er', portals=['cams', 'c3s'], limit=10)`:
# SELECT suggestion FROM
# (
# SELECT unnest(array_agg(distinct(t.g))) AS suggestion FROM
# (
# SELECT unnest(string_to_array(lower(title), ' ')) AS g FROM resources
# WHERE resources.hidden = true AND resources.portal IN ('cams', 'c3s')
# )
# AS t
# ) AS tt
# WHERE length(tt.suggestion) > 2 AND tt.suggestion ILIKE 'er%'
# ORDER BY tt.suggestion
# LIMIT 10;

return search


def split_by_category(keywords: list) -> list:
"""Given a list of keywords composed by a "category: value", split them in multiple lists.
Expand Down
36 changes: 36 additions & 0 deletions cads_catalogue_api_service/typeahead.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import fastapi

from . import dependencies, search_utils

router = fastapi.APIRouter(
prefix="",
tags=["typeahead"],
responses={fastapi.status.HTTP_404_NOT_FOUND: {"description": "Not found"}},
include_in_schema=False,
)


@router.get("/typeahead")
def typeahead(
session=fastapi.Depends(dependencies.get_session),
portals: list[str] | None = fastapi.Depends(dependencies.get_portals),
chars: str = fastapi.Query(..., min_length=2, max_length=50),
) -> list[str]:
"""Typeahead for CADS webportal search feature."""
search = search_utils.apply_filters_typeahead(
session, chars, search=None, portals=portals
)
result = session.execute(search)
return result.scalars().all()

0 comments on commit 879a897

Please sign in to comment.