Knowledge Base Module

Service¶

`unique_toolkit.services.knowledge_base` ¶

`KnowledgeBaseService` ¶

Provides methods for searching, downloading and uploading content in the knowledge base.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

class KnowledgeBaseService:
    """
    Provides methods for searching, downloading and uploading content in the knowledge base.
    """

    def __init__(
        self,
        company_id: str,
        user_id: str,
        metadata_filter: dict[str, Any] | None = None,
    ):
        """
        Initialize the KnowledgeBaseService with a company_id, user_id and chat_id.
        """

        self._metadata_filter = None
        [company_id, user_id] = validate_required_values([company_id, user_id])
        self._company_id = company_id
        self._user_id = user_id
        self._metadata_filter = metadata_filter

    @classmethod
    @deprecated(
        "Use UniqueContext.from_chat_event(event) (if you have a ChatEvent) or "
        "UniqueContext.from_event(event) (for any BaseEvent) with UniqueServiceFactory instead."
    )
    def from_event(cls, event: BaseEvent[Any]):
        """
        Initialize the ContentService with an event.
        """
        metadata_filter = None

        if isinstance(event, (ChatEvent | Event)):
            metadata_filter = event.payload.metadata_filter

        return cls(
            company_id=event.company_id,
            user_id=event.user_id,
            metadata_filter=metadata_filter,
        )

    @classmethod
    def from_context(cls, context: UniqueContext) -> Self:
        """Create a KnowledgeBaseService from a :class:`UniqueContext`.

        This is the preferred constructor when using the service factory pattern.

        Args:
            context: The request context carrying auth and chat information.
        """
        metadata_filter = (
            context.chat.metadata_filter if context.chat is not None else None
        )
        return cls(
            company_id=context.auth.get_confidential_company_id(),
            user_id=context.auth.get_confidential_user_id(),
            metadata_filter=metadata_filter,
        )

    @classmethod
    def from_settings(
        cls,
        settings: UniqueSettings | str | None = None,
        metadata_filter: dict[str, Any] | None = None,
        **kwargs: Any,
    ):
        """
        Initialize the ContentService with a settings object and metadata filter.
        """

        if settings is None:
            settings = UniqueSettings.from_env_auto_with_sdk_init()
        elif isinstance(settings, str):
            settings = UniqueSettings.from_env_auto_with_sdk_init(filename=settings)

        if metadata_filter is None and settings.context.chat is not None:
            metadata_filter = settings.context.chat.metadata_filter

        return cls(
            company_id=settings.authcontext.get_confidential_company_id(),
            user_id=settings.authcontext.get_confidential_user_id(),
            metadata_filter=metadata_filter,
        )

    # Content Search
    # ------------------------------------------------------------------------------------------------

    @overload
    def search_content_chunks(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict[str, Any],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    @overload
    def search_content_chunks(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict[str, Any],
        content_ids: list[str],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    def search_content_chunks(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
        scope_ids: list[str] | None = None,
        metadata_filter: dict[str, Any] | None = None,
        content_ids: list[str] | None = None,
        score_threshold: float | None = None,
    ) -> list[ContentChunk]:
        """
        Performs a synchronous search for content chunks in the knowledge base.

        Args:
            search_string (str): The search string.
            search_type (ContentSearchType): The type of search to perform.
            limit (int): The maximum number of results to return.
            search_language (str, optional): The language for the full-text search. Defaults to "english".
            reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
            scope_ids (list[str] | None, optional): Deprecated. Folded into ``metadata_filter``
                as a ``folderId in [scope_ids]`` clause; do not use for new code.
            metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
            content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
            score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.

        Returns:
            list[ContentChunk]: The search results.

        Raises:
            Exception: If there's an error during the search operation.
        """

        if metadata_filter is None:
            metadata_filter = self._metadata_filter

        if scope_ids:
            warnings.warn(
                "Passing scope_ids to KnowledgeBaseService.search_content_chunks is "
                "deprecated; use metadata_filter with folderId operator 'in' instead.",
                DeprecationWarning,
                stacklevel=2,
            )
            clause = build_folder_id_in_clause(scope_ids)
            metadata_filter = merge_scope_clause_into_metadata_filter(
                clause, metadata_filter
            )
            scope_ids = None

        try:
            searches = search_content_chunks(
                user_id=self._user_id,
                company_id=self._company_id,
                chat_id="",
                search_string=search_string,
                search_type=search_type,
                limit=limit,
                search_language=search_language,
                reranker_config=reranker_config,
                chat_only=False,
                metadata_filter=metadata_filter,
                content_ids=content_ids,
                score_threshold=score_threshold,
            )
            return searches
        except Exception as e:
            _LOGGER.error(f"Error while searching content chunks: {e}")
            raise e

    @overload
    async def search_content_chunks_async(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict[str, Any],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    @overload
    async def search_content_chunks_async(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict[str, Any],
        content_ids: list[str],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    async def search_content_chunks_async(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
        scope_ids: list[str] | None = None,
        metadata_filter: dict[str, Any] | None = None,
        content_ids: list[str] | None = None,
        score_threshold: float | None = None,
    ):
        """
        Performs an asynchronous search for content chunks in the knowledge base.

        Args:
            search_string (str): The search string.
            search_type (ContentSearchType): The type of search to perform.
            limit (int): The maximum number of results to return.
            search_language (str, optional): The language for the full-text search. Defaults to "english".
            reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
            scope_ids (list[str] | None, optional): Deprecated. Folded into ``metadata_filter``
                as a ``folderId in [scope_ids]`` clause; do not use for new code.
            metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
            content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
            score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.

        Returns:
            list[ContentChunk]: The search results.

        Raises:
            Exception: If there's an error during the search operation.
        """
        if metadata_filter is None:
            metadata_filter = self._metadata_filter

        if scope_ids:
            warnings.warn(
                "Passing scope_ids to KnowledgeBaseService.search_content_chunks_async is "
                "deprecated; use metadata_filter with folderId operator 'in' instead.",
                DeprecationWarning,
                stacklevel=2,
            )
            clause = build_folder_id_in_clause(scope_ids)
            metadata_filter = merge_scope_clause_into_metadata_filter(
                clause, metadata_filter
            )
            scope_ids = None

        try:
            searches = await search_content_chunks_async(
                user_id=self._user_id,
                company_id=self._company_id,
                chat_id="",
                search_string=search_string,
                search_type=search_type,
                limit=limit,
                search_language=search_language,
                reranker_config=reranker_config,
                chat_only=False,
                metadata_filter=metadata_filter,
                content_ids=content_ids,
                score_threshold=score_threshold,
            )
            return searches
        except Exception as e:
            _LOGGER.error(f"Error while searching content chunks: {e}")
            raise e

    def search_contents(
        self,
        *,
        where: dict[str, Any],
        include_failed_content: bool = False,
    ) -> list[Content]:
        """
        Performs a search in the knowledge base by filter (and not a smilarity search)
        This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.

        Args:
            where (dict): The search criteria.

        Returns:
            list[Content]: The search results.
        """

        return search_contents(
            user_id=self._user_id,
            company_id=self._company_id,
            chat_id="",
            where=where,
            include_failed_content=include_failed_content,
        )

    async def search_contents_async(
        self,
        *,
        where: dict[str, Any],
        include_failed_content: bool = False,
    ) -> list[Content]:
        """
        Performs an asynchronous search for content files in the knowledge base by filter.

        Args:
            where (dict): The search criteria.

        Returns:
            list[Content]: The search results.
        """

        return await search_contents_async(
            user_id=self._user_id,
            company_id=self._company_id,
            chat_id="",
            where=where,
            include_failed_content=include_failed_content,
        )

    # Content Management
    # ------------------------------------------------------------------------------------------------

    def upload_content_from_bytes(
        self,
        content: bytes,
        *,
        content_name: str,
        mime_type: str,
        scope_id: str,
        skip_ingestion: bool = False,
        ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> Content:
        """
        Uploads content to the knowledge base.

        Args:
            content (bytes): The content to upload.
            content_name (str): The name of the content.
            mime_type (str): The MIME type of the content.
            scope_id (str | None): The scope ID. Defaults to None.
            skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
            ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
            metadata (dict | None): The metadata to associate with the content. Defaults to None.

        Returns:
            Content: The uploaded content.
        """

        return upload_content_from_bytes(
            user_id=self._user_id,
            company_id=self._company_id,
            content=content,
            content_name=content_name,
            mime_type=mime_type,
            scope_id=scope_id,
            chat_id="",
            skip_ingestion=skip_ingestion,
            ingestion_config=ingestion_config,
            metadata=metadata,
        )

    async def upload_content_from_bytes_async(
        self,
        content: bytes,
        *,
        content_name: str,
        mime_type: str,
        scope_id: str,
        skip_ingestion: bool = False,
        ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> Content:
        """
        Uploads content to the knowledge base.

        Args:
            content (bytes): The content to upload.
            content_name (str): The name of the content.
            mime_type (str): The MIME type of the content.
            scope_id (str | None): The scope ID. Defaults to None.
            skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
            skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
            ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
            metadata (dict | None): The metadata to associate with the content. Defaults to None.

        Returns:
            Content: The uploaded content.
        """

        return await upload_content_from_bytes_async(
            user_id=self._user_id,
            company_id=self._company_id,
            content=content,
            content_name=content_name,
            mime_type=mime_type,
            scope_id=scope_id,
            chat_id="",
            skip_ingestion=skip_ingestion,
            ingestion_config=ingestion_config,
            metadata=metadata,
        )

    def upload_content(
        self,
        path_to_content: str,
        content_name: str,
        mime_type: str,
        scope_id: str,
        skip_ingestion: bool = False,
        skip_excel_ingestion: bool = False,
        ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> Content:
        """
        Uploads content to the knowledge base.

        Args:
            path_to_content (str): The path to the content to upload.
            content_name (str): The name of the content.
            mime_type (str): The MIME type of the content.
            scope_id (str | None): The scope ID. Defaults to None.
            skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
            skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
            ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
            metadata (dict[str, Any] | None): The metadata to associate with the content. Defaults to None.

        Returns:
            Content: The uploaded content.
        """

        return upload_content(
            user_id=self._user_id,
            company_id=self._company_id,
            path_to_content=path_to_content,
            content_name=content_name,
            mime_type=mime_type,
            scope_id=scope_id,
            chat_id="",
            skip_ingestion=skip_ingestion,
            skip_excel_ingestion=skip_excel_ingestion,
            ingestion_config=ingestion_config,
            metadata=metadata,
        )

    def download_content_to_file(
        self,
        *,
        content_id: str,
        output_dir_path: Path | None = None,
        output_filename: str | None = None,
    ) -> Path:
        """
        Downloads content from a chat and saves it to a file.

        Args:
            content_id (str): The ID of the content to download.
            output_filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
            output_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".

        Returns:
            Path: The path to the downloaded file.

        Raises:
            Exception: If the download fails or the filename cannot be determined.
        """

        return download_content_to_file_by_id(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            chat_id="",
            filename=output_filename,
            tmp_dir_path=output_dir_path,
        )

    def download_content_to_bytes(
        self,
        *,
        content_id: str,
    ) -> bytes:
        """
        Downloads content to memory

        Args:
            content_id (str): The id of the uploaded content.

        Returns:
            bytes: The downloaded content.

        Raises:
            Exception: If the download fails.
        """

        return download_content_to_bytes(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            chat_id=None,
        )

    async def download_content_to_bytes_async(
        self,
        *,
        content_id: str,
    ) -> bytes:
        """
        Asynchronously downloads content to memory.

        Args:
            content_id (str): The id of the uploaded content.

        Returns:
            bytes: The downloaded content.

        Raises:
            Exception: If the download fails.
        """

        return await download_content_to_bytes_async(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            chat_id=None,
        )

    def batch_file_upload(
        self,
        *,
        local_files: list[Path],
        remote_folders: list[PurePath],
        overwrite: bool = False,
        metadata_generator: Callable[[Path, PurePath], dict[str, Any]] | None = None,
    ) -> None:
        """
        Upload files to the knowledge base into corresponding folders

        Args:
            local_files (list[Path]): The local files to upload
            remote_folders (list[PurePath]): The remote folders to upload the files to
            overwrite (bool): Whether to overwrite existing files
            metadata_generator (Callable[[Path, PurePath], dict[str, Any]] | None): The metadata generator function

        Returns:
            None
        """

        if len(local_files) != len(remote_folders):
            raise ValueError(
                "The number of local files and remote folders must be the same"
            )

        creation_result = self.create_folders(paths=remote_folders)

        folders_path_to_scope_id = {
            folder_path: result.id
            for folder_path, result in zip(remote_folders, creation_result)
        }

        _old_scope_id = None
        _existing_file_names: list[str] = []

        for remote_folder_path, local_file_path in zip(remote_folders, local_files):
            scope_id = folders_path_to_scope_id[remote_folder_path]
            mime_type = mimetypes.guess_type(local_file_path.name)[0]

            if mime_type is None:
                _LOGGER.warning(
                    f"No mime type found for file {local_file_path.name}, skipping"
                )
                continue

            if not overwrite:
                if _old_scope_id is None or _old_scope_id != scope_id:
                    _LOGGER.debug(f"Switching to new folder {scope_id}")
                    _old_scope_id = scope_id
                    _existing_file_names = self.get_file_names_in_folder(
                        scope_id=scope_id
                    )

                if local_file_path.name in _existing_file_names:
                    _LOGGER.warning(
                        f"File {local_file_path.name} already exists in folder {scope_id}, skipping"
                    )
                    continue

            metadata = None
            if metadata_generator is not None:
                metadata = metadata_generator(local_file_path, remote_folder_path)

            self.upload_content(
                path_to_content=str(local_file_path),
                content_name=local_file_path.name,
                mime_type=mime_type,
                scope_id=scope_id,
                metadata=metadata,
            )

    # Content Information
    # ------------------------------------------------------------------------------------------------
    def get_paginated_content_infos(
        self,
        *,
        metadata_filter: dict[str, Any] | None = None,
        skip: int | None = None,
        take: int | None = None,
        file_path: str | None = None,
    ) -> PaginatedContentInfos:
        return get_content_info(
            user_id=self._user_id,
            company_id=self._company_id,
            metadata_filter=metadata_filter,
            skip=skip,
            take=take,
            file_path=file_path,
        )

    async def get_paginated_content_infos_async(
        self,
        *,
        metadata_filter: dict[str, Any] | None = None,
        skip: int | None = None,
        take: int | None = None,
        file_path: str | None = None,
    ) -> PaginatedContentInfos:
        return await get_content_info_async(
            user_id=self._user_id,
            company_id=self._company_id,
            metadata_filter=metadata_filter,
            skip=skip,
            take=take,
            file_path=file_path,
        )

    async def get_content_infos_async(
        self,
        *,
        metadata_filter: dict[str, Any] | None = None,
        step_size: int = 100,
        max_concurrent_requests: int = 10,
    ) -> list[ContentInfo]:
        """
        Fetches all content infos from the knowledge base using parallel pagination.
        The API limits responses to 100 items per request, so this method fetches
        the total count first, then retrieves all pages concurrently (bounded by
        ``max_concurrent_requests`` to avoid rate limiting or connection exhaustion).

        Args:
            metadata_filter (dict[str, Any] | None): The metadata filter to use. Defaults to None.
            step_size (int): Number of items per page. Defaults to 100.
            max_concurrent_requests (int): Maximum number of concurrent API calls.
                Defaults to 10.

        Returns:
            list[ContentInfo]: All content infos visible to the user.
        """

        info_for_count_of_total_content = await self.get_paginated_content_infos_async(
            metadata_filter=metadata_filter,
            take=1,
        )

        total_count = info_for_count_of_total_content.total_count

        semaphore = asyncio.Semaphore(max_concurrent_requests)

        async def _fetch_page(skip: int) -> PaginatedContentInfos:
            async with semaphore:
                return await self.get_paginated_content_infos_async(
                    metadata_filter=metadata_filter,
                    skip=skip,
                    take=step_size,
                )

        results: list[PaginatedContentInfos | BaseException] = await asyncio.gather(
            *[_fetch_page(i) for i in range(0, total_count, step_size)],
            return_exceptions=True,
        )

        for result in results:
            if isinstance(result, BaseException):
                _LOGGER.error("Error fetching paginated content infos", exc_info=result)

        return [
            content_info
            for result in results
            if not isinstance(result, BaseException)
            for content_info in result.content_infos
        ]

    def get_file_names_in_folder(self, *, scope_id: str) -> list[str]:
        """
        Get the list of file names in a knowledge base folder

        Args:
            scope_id (str): The scope id of the folder

        Returns:
            list[str]: The list of file names in the folder
        """
        smart_rule = Statement(
            operator=Operator.EQUALS, value=scope_id, path=["folderId"]
        )
        infos = self.get_paginated_content_infos(
            metadata_filter=smart_rule.model_dump(mode="json")
        )
        return [i.key for i in infos.content_infos]

    # Folder Management
    # ------------------------------------------------------------------------------------------------

    def get_folder_info(
        self,
        *,
        scope_id: str,
    ) -> FolderInfo:
        return get_folder_info(
            user_id=self._user_id,
            company_id=self._company_id,
            scope_id=scope_id,
        )

    async def get_folder_info_async(
        self,
        *,
        scope_id: str,
    ) -> FolderInfo:
        return await get_folder_info_async(
            user_id=self._user_id,
            company_id=self._company_id,
            scope_id=scope_id,
        )

    # File Tree Resolution
    # ------------------------------------------------------------------------------------------------

    @staticmethod
    def extract_scope_ids(content_infos: list[ContentInfo]) -> set[str]:
        """Extracts all unique scope IDs from the ``folderIdPath`` metadata field.

        Args:
            content_infos: The content infos to extract scope IDs from.

        Returns:
            set[str]: All unique scope IDs found across content infos.
        """
        scope_ids: set[str] = set()
        for content_info in content_infos:
            if (
                content_info.metadata
                and (folder_id_path := content_info.metadata.get("folderIdPath"))
                is not None
                and isinstance(folder_id_path, str)
            ):
                scope_ids.update(
                    sid
                    for sid in folder_id_path.replace("uniquepathid://", "").split("/")
                    if sid
                )
        return scope_ids

    async def _translate_scope_id_async(self, scope_id: str) -> str | None:
        """Resolve a single scope ID to its folder name.

        Returns the folder name, or ``None`` if the lookup fails.
        """
        try:
            folder_info = await self.get_folder_info_async(scope_id=scope_id)
            return folder_info.name
        except Exception as e:
            _LOGGER.warning(
                f"Could not resolve folder for scope_id {scope_id}", exc_info=e
            )
            return None

    async def _translate_scope_ids_async(
        self,
        scope_ids: set[str],
        *,
        max_concurrent_requests: int = 25,
    ) -> dict[str, str]:
        """Translate a set of scope IDs to folder names concurrently.

        Scope IDs that cannot be resolved are silently omitted from the result.

        Args:
            scope_ids: The scope IDs to translate.
            max_concurrent_requests: Maximum number of concurrent API calls.
                Defaults to 25.

        Returns:
            dict[str, str]: Mapping from scope ID to folder name.
        """
        scope_id_list = list(scope_ids)
        semaphore = asyncio.Semaphore(max_concurrent_requests)

        async def _resolve(sid: str) -> str | None:
            async with semaphore:
                return await self._translate_scope_id_async(sid)

        results = await asyncio.gather(*[_resolve(sid) for sid in scope_id_list])
        return {
            sid: name for sid, name in zip(scope_id_list, results) if name is not None
        }

    async def resolve_visible_file_paths_async(
        self,
        *,
        metadata_filter: dict[str, Any] | None = None,
    ) -> list[tuple[ContentInfo, list[str]]]:
        """Resolves file paths visible to the current user asynchronously.

        Returns each content item paired with its resolved file path segments.

        Args:
            metadata_filter: Optional metadata filter to narrow the content scope.

        Returns:
            list[tuple[ContentInfo, list[str]]]: Each tuple is
                ``(content_info, [folder1, folder2, ..., filename])``.
        """
        content_infos = await self.get_content_infos_async(
            metadata_filter=metadata_filter,
        )
        scope_ids = self.extract_scope_ids(content_infos)
        scope_id_to_folder_name = await self._translate_scope_ids_async(scope_ids)

        resolved: list[tuple[ContentInfo, list[str]]] = []
        for content_info in content_infos:
            if (
                content_info.metadata
                and (folder_id_path := content_info.metadata.get("folderIdPath"))
                is not None
                and isinstance(folder_id_path, str)
            ):
                file_path = [
                    scope_id_to_folder_name.get(sid, sid)
                    for sid in folder_id_path.replace("uniquepathid://", "").split("/")
                    if sid
                ]
            else:
                file_path = ["_no_folder_path"]

            file_path.append(content_info.key)
            resolved.append((content_info, file_path))

        return resolved

    def _pop_forbidden_metadata_keys(self, metadata: dict[str, Any]) -> dict[str, Any]:
        forbidden_keys = [
            "key",
            "url",
            "title",
            "folderId",
            "mimeType",
            "companyId",
            "contentId",
            "folderIdPath",
            "externalFileOwner",
        ]
        for key in forbidden_keys:
            metadata.pop(key, None)
        return metadata

    def create_folders(self, *, paths: list[PurePath]) -> list[BaseFolderInfo]:
        """
        Create folders in the knowledge base if the path does not exists.

        Args:
            paths (list[PurePath]): The paths to create the folders at

        Returns:
            list[BaseFolderInfo]: The information about the created folders or existing folders
        """
        result = unique_sdk.Folder.create_paths(
            user_id=self._user_id,
            company_id=self._company_id,
            paths=[path.as_posix() for path in paths],
        )
        return [
            BaseFolderInfo.model_validate(folder, by_alias=True, by_name=True)
            for folder in result["createdFolders"]
        ]

        # Metadata

    # Metadata Management
    # ------------------------------------------------------------------------------------------------

    def replace_content_metadata(
        self,
        *,
        content_id: str,
        metadata: dict[str, Any],
    ) -> ContentInfo:
        return update_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            metadata=metadata,
        )

    def update_content_metadata(
        self,
        *,
        content_info: ContentInfo,
        additional_metadata: dict[str, Any],
    ) -> ContentInfo:
        camelized_additional_metadata = humps.camelize(additional_metadata)
        camelized_additional_metadata = self._pop_forbidden_metadata_keys(
            camelized_additional_metadata
        )

        if content_info.metadata is not None:
            content_info.metadata.update(camelized_additional_metadata)
        else:
            content_info.metadata = camelized_additional_metadata

        return update_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_info.id,
            metadata=content_info.metadata,
        )

    def remove_content_metadata(
        self,
        *,
        content_info: ContentInfo,
        keys_to_remove: list[str],
    ) -> ContentInfo:
        """
        Removes the specified keys irreversibly from the content metadata.

        Note: Keys are camelized before being removed as metadata keys are stored in camelCase.
        """

        if content_info.metadata is None:
            _LOGGER.warning(f"Content metadata is None for content {content_info.id}")
            return content_info

        for key in keys_to_remove:
            content_info.metadata[humps.camelize(key)] = None

        return update_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_info.id,
            metadata=content_info.metadata or {},
        )

    @overload
    def update_contents_metadata(
        self,
        *,
        additional_metadata: dict[str, Any],
        content_infos: list[ContentInfo],
    ) -> list[ContentInfo]: ...

    @overload
    def update_contents_metadata(
        self, *, additional_metadata: dict[str, Any], metadata_filter: dict[str, Any]
    ) -> list[ContentInfo]: ...

    def update_contents_metadata(
        self,
        *,
        additional_metadata: dict[str, Any],
        metadata_filter: dict[str, Any] | None = None,
        content_infos: list[ContentInfo] | None = None,
    ) -> list[ContentInfo]:
        """Update the metadata of the contents matching the metadata filter.

        Note: Keys are camelized before being updated as metadata keys are stored in camelCase.
        """

        additional_metadata_camelized = humps.camelize(additional_metadata)
        additional_metadata_camelized = self._pop_forbidden_metadata_keys(
            additional_metadata_camelized
        )

        if content_infos is None:
            content_infos = self.get_paginated_content_infos(
                metadata_filter=metadata_filter,
            ).content_infos

        for info in content_infos:
            self.update_content_metadata(
                content_info=info, additional_metadata=additional_metadata_camelized
            )

        return content_infos

    @overload
    def remove_contents_metadata(
        self,
        *,
        keys_to_remove: list[str],
        content_infos: list[ContentInfo],
    ) -> list[ContentInfo]: ...

    @overload
    def remove_contents_metadata(
        self, *, keys_to_remove: list[str], metadata_filter: dict[str, Any]
    ) -> list[ContentInfo]: ...

    def remove_contents_metadata(
        self,
        *,
        keys_to_remove: list[str],
        metadata_filter: dict[str, Any] | None = None,
        content_infos: list[ContentInfo] | None = None,
    ) -> list[ContentInfo]:
        """Remove the specified keys irreversibly from the content metadata.

        Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

        """

        if content_infos is None:
            content_infos = self.get_paginated_content_infos(
                metadata_filter=metadata_filter,
            ).content_infos

        for info in content_infos:
            self.remove_content_metadata(
                content_info=info, keys_to_remove=keys_to_remove
            )

        return content_infos

    # Delete
    # ------------------------------------------------------------------------------------------------

    @overload
    def delete_content(
        self,
        *,
        content_id: str,
    ) -> DeleteContentResponse: ...

    """Delete content by id"""

    @overload
    def delete_content(
        self,
        *,
        file_path: str,
    ) -> DeleteContentResponse: ...

    """Delete all content matching the file path"""

    def delete_content(
        self,
        *,
        content_id: str | None = None,
        file_path: str | None = None,
    ) -> DeleteContentResponse:
        """Delete content by id, file path or metadata filter."""
        return delete_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            file_path=file_path,
        )

    def delete_contents(
        self,
        *,
        metadata_filter: dict[str, Any],
    ) -> list[DeleteContentResponse]:
        """Delete all content matching the metadata filter."""
        resp: list[DeleteContentResponse] = []

        if metadata_filter:
            infos = self.get_paginated_content_infos(
                metadata_filter=metadata_filter,
            )

            for info in infos.content_infos:
                resp.append(
                    delete_content(
                        user_id=self._user_id,
                        company_id=self._company_id,
                        content_id=info.id,
                    )
                )

        return resp

    @overload
    async def delete_content_async(
        self,
        *,
        content_id: str,
    ) -> DeleteContentResponse: ...

    @overload
    async def delete_content_async(
        self,
        *,
        file_path: str,
    ) -> DeleteContentResponse: ...

    async def delete_content_async(
        self,
        *,
        content_id: str | None = None,
        file_path: str | None = None,
    ) -> DeleteContentResponse:
        return await delete_content_async(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            file_path=file_path,
        )

    async def delete_contents_async(
        self,
        *,
        metadata_filter: dict[str, Any],
    ) -> list[DeleteContentResponse]:
        """Delete all content matching the metadata filter."""
        if not metadata_filter:
            return []

        infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        )

        # Create all delete tasks without awaiting them
        delete_tasks = [
            delete_content_async(
                user_id=self._user_id,
                company_id=self._company_id,
                content_id=info.id,
            )
            for info in infos.content_infos
        ]

        # Await all delete operations concurrently
        resp = await asyncio.gather(*delete_tasks)

        return list(resp)

    def _get_knowledge_base_location(
        self, *, scope_id: str
    ) -> tuple[PurePath, list[str]]:
        """
        Get the path of a folder from a scope id.

        Args:
            scope_id (str): The scope id of the folder.

        Returns:
            PurePath: The path of the folder.
            list[str]: The list of scope ids from root to the folder.
        """

        list_of_folder_names: list[str] = []
        list_of_scope_ids: list[str] = []
        folder_info = self.get_folder_info(scope_id=scope_id)
        list_of_scope_ids.append(folder_info.id)
        if folder_info.parent_id is not None:
            list_of_folder_names.append(folder_info.name)
        else:
            return PurePath("/" + folder_info.name), list_of_scope_ids

        while folder_info.parent_id is not None:
            parent_scope_id = folder_info.parent_id
            folder_info = self.get_folder_info(scope_id=parent_scope_id)
            list_of_folder_names.append(folder_info.name)
            list_of_scope_ids.append(folder_info.id)

        list_of_scope_ids.reverse()
        return PurePath("/" + "/".join(list_of_folder_names[::-1])), list_of_scope_ids

    # Utility Functions
    # ------------------------------------------------------------------------------------------------

    def get_folder_path(self, *, scope_id: str) -> PurePath:
        """
        Get the path of a folder from a scope id.
        Args:
            scope_id (str): The scope id of the folder.

        Returns:
            PurePath: The path of the folder.
        """
        folder_path, _ = self._get_knowledge_base_location(scope_id=scope_id)
        return folder_path

    def get_scope_id_path(self, *, scope_id: str) -> list[str]:
        """
        Get the path of a folder from a scope id.
        Args:
            scope_id (str): The scope id of the folder.

        Returns:
            list[str]: The list of scope ids from root to the folder.
        """
        _, list_of_scope_ids = self._get_knowledge_base_location(scope_id=scope_id)
        return list_of_scope_ids

`init(company_id, user_id, metadata_filter=None)` ¶

Initialize the KnowledgeBaseService with a company_id, user_id and chat_id.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def __init__(
    self,
    company_id: str,
    user_id: str,
    metadata_filter: dict[str, Any] | None = None,
):
    """
    Initialize the KnowledgeBaseService with a company_id, user_id and chat_id.
    """

    self._metadata_filter = None
    [company_id, user_id] = validate_required_values([company_id, user_id])
    self._company_id = company_id
    self._user_id = user_id
    self._metadata_filter = metadata_filter

`batch_file_upload(*, local_files, remote_folders, overwrite=False, metadata_generator=None)` ¶

Upload files to the knowledge base into corresponding folders

Parameters:

Name	Type	Description	Default
`local_files`	`list[Path]`	The local files to upload	required
`remote_folders`	`list[PurePath]`	The remote folders to upload the files to	required
`overwrite`	`bool`	Whether to overwrite existing files	`False`
`metadata_generator`	`Callable[[Path, PurePath], dict[str, Any]] \| None`	The metadata generator function	`None`

Returns:

Type	Description
`None`	None

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def batch_file_upload(
    self,
    *,
    local_files: list[Path],
    remote_folders: list[PurePath],
    overwrite: bool = False,
    metadata_generator: Callable[[Path, PurePath], dict[str, Any]] | None = None,
) -> None:
    """
    Upload files to the knowledge base into corresponding folders

    Args:
        local_files (list[Path]): The local files to upload
        remote_folders (list[PurePath]): The remote folders to upload the files to
        overwrite (bool): Whether to overwrite existing files
        metadata_generator (Callable[[Path, PurePath], dict[str, Any]] | None): The metadata generator function

    Returns:
        None
    """

    if len(local_files) != len(remote_folders):
        raise ValueError(
            "The number of local files and remote folders must be the same"
        )

    creation_result = self.create_folders(paths=remote_folders)

    folders_path_to_scope_id = {
        folder_path: result.id
        for folder_path, result in zip(remote_folders, creation_result)
    }

    _old_scope_id = None
    _existing_file_names: list[str] = []

    for remote_folder_path, local_file_path in zip(remote_folders, local_files):
        scope_id = folders_path_to_scope_id[remote_folder_path]
        mime_type = mimetypes.guess_type(local_file_path.name)[0]

        if mime_type is None:
            _LOGGER.warning(
                f"No mime type found for file {local_file_path.name}, skipping"
            )
            continue

        if not overwrite:
            if _old_scope_id is None or _old_scope_id != scope_id:
                _LOGGER.debug(f"Switching to new folder {scope_id}")
                _old_scope_id = scope_id
                _existing_file_names = self.get_file_names_in_folder(
                    scope_id=scope_id
                )

            if local_file_path.name in _existing_file_names:
                _LOGGER.warning(
                    f"File {local_file_path.name} already exists in folder {scope_id}, skipping"
                )
                continue

        metadata = None
        if metadata_generator is not None:
            metadata = metadata_generator(local_file_path, remote_folder_path)

        self.upload_content(
            path_to_content=str(local_file_path),
            content_name=local_file_path.name,
            mime_type=mime_type,
            scope_id=scope_id,
            metadata=metadata,
        )

`create_folders(*, paths)` ¶

Create folders in the knowledge base if the path does not exists.

Parameters:

Name	Type	Description	Default
`paths`	`list[PurePath]`	The paths to create the folders at	required

Returns:

Type	Description
`list[BaseFolderInfo]`	list[BaseFolderInfo]: The information about the created folders or existing folders

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def create_folders(self, *, paths: list[PurePath]) -> list[BaseFolderInfo]:
    """
    Create folders in the knowledge base if the path does not exists.

    Args:
        paths (list[PurePath]): The paths to create the folders at

    Returns:
        list[BaseFolderInfo]: The information about the created folders or existing folders
    """
    result = unique_sdk.Folder.create_paths(
        user_id=self._user_id,
        company_id=self._company_id,
        paths=[path.as_posix() for path in paths],
    )
    return [
        BaseFolderInfo.model_validate(folder, by_alias=True, by_name=True)
        for folder in result["createdFolders"]
    ]

`delete_content(*, content_id=None, file_path=None)` ¶

delete_content(*, content_id: str) -> DeleteContentResponse

delete_content(*, file_path: str) -> DeleteContentResponse

Delete content by id, file path or metadata filter.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def delete_content(
    self,
    *,
    content_id: str | None = None,
    file_path: str | None = None,
) -> DeleteContentResponse:
    """Delete content by id, file path or metadata filter."""
    return delete_content(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_id,
        file_path=file_path,
    )

`delete_contents(*, metadata_filter)` ¶

Delete all content matching the metadata filter.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def delete_contents(
    self,
    *,
    metadata_filter: dict[str, Any],
) -> list[DeleteContentResponse]:
    """Delete all content matching the metadata filter."""
    resp: list[DeleteContentResponse] = []

    if metadata_filter:
        infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        )

        for info in infos.content_infos:
            resp.append(
                delete_content(
                    user_id=self._user_id,
                    company_id=self._company_id,
                    content_id=info.id,
                )
            )

    return resp

`delete_contents_async(*, metadata_filter)` `async` ¶

Delete all content matching the metadata filter.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

async def delete_contents_async(
    self,
    *,
    metadata_filter: dict[str, Any],
) -> list[DeleteContentResponse]:
    """Delete all content matching the metadata filter."""
    if not metadata_filter:
        return []

    infos = self.get_paginated_content_infos(
        metadata_filter=metadata_filter,
    )

    # Create all delete tasks without awaiting them
    delete_tasks = [
        delete_content_async(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=info.id,
        )
        for info in infos.content_infos
    ]

    # Await all delete operations concurrently
    resp = await asyncio.gather(*delete_tasks)

    return list(resp)

`download_content_to_bytes(*, content_id)` ¶

Downloads content to memory

Parameters:

Name	Type	Description	Default
`content_id`	`str`	The id of the uploaded content.	required

Returns:

Name	Type	Description
`bytes`	`bytes`	The downloaded content.

Raises:

Type	Description
`Exception`	If the download fails.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def download_content_to_bytes(
    self,
    *,
    content_id: str,
) -> bytes:
    """
    Downloads content to memory

    Args:
        content_id (str): The id of the uploaded content.

    Returns:
        bytes: The downloaded content.

    Raises:
        Exception: If the download fails.
    """

    return download_content_to_bytes(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_id,
        chat_id=None,
    )

`download_content_to_bytes_async(*, content_id)` `async` ¶

Asynchronously downloads content to memory.

Parameters:

Name	Type	Description	Default
`content_id`	`str`	The id of the uploaded content.	required

Returns:

Name	Type	Description
`bytes`	`bytes`	The downloaded content.

Raises:

Type	Description
`Exception`	If the download fails.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

async def download_content_to_bytes_async(
    self,
    *,
    content_id: str,
) -> bytes:
    """
    Asynchronously downloads content to memory.

    Args:
        content_id (str): The id of the uploaded content.

    Returns:
        bytes: The downloaded content.

    Raises:
        Exception: If the download fails.
    """

    return await download_content_to_bytes_async(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_id,
        chat_id=None,
    )

`download_content_to_file(*, content_id, output_dir_path=None, output_filename=None)` ¶

Downloads content from a chat and saves it to a file.

Parameters:

Name	Type	Description	Default
`content_id`	`str`	The ID of the content to download.	required
`output_filename`	`str \| None`	The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.	`None`
`output_dir_path`	`str \| Path \| None`	The path to the temporary directory where the content will be saved. Defaults to "/tmp".	`None`

Returns:

Name	Type	Description
`Path`	`Path`	The path to the downloaded file.

Raises:

Type	Description
`Exception`	If the download fails or the filename cannot be determined.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def download_content_to_file(
    self,
    *,
    content_id: str,
    output_dir_path: Path | None = None,
    output_filename: str | None = None,
) -> Path:
    """
    Downloads content from a chat and saves it to a file.

    Args:
        content_id (str): The ID of the content to download.
        output_filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
        output_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".

    Returns:
        Path: The path to the downloaded file.

    Raises:
        Exception: If the download fails or the filename cannot be determined.
    """

    return download_content_to_file_by_id(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_id,
        chat_id="",
        filename=output_filename,
        tmp_dir_path=output_dir_path,
    )

`extract_scope_ids(content_infos)` `staticmethod` ¶

Extracts all unique scope IDs from the folderIdPath metadata field.

Parameters:

Name	Type	Description	Default
`content_infos`	`list[ContentInfo]`	The content infos to extract scope IDs from.	required

Returns:

Type	Description
`set[str]`	set[str]: All unique scope IDs found across content infos.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

@staticmethod
def extract_scope_ids(content_infos: list[ContentInfo]) -> set[str]:
    """Extracts all unique scope IDs from the ``folderIdPath`` metadata field.

    Args:
        content_infos: The content infos to extract scope IDs from.

    Returns:
        set[str]: All unique scope IDs found across content infos.
    """
    scope_ids: set[str] = set()
    for content_info in content_infos:
        if (
            content_info.metadata
            and (folder_id_path := content_info.metadata.get("folderIdPath"))
            is not None
            and isinstance(folder_id_path, str)
        ):
            scope_ids.update(
                sid
                for sid in folder_id_path.replace("uniquepathid://", "").split("/")
                if sid
            )
    return scope_ids

`from_context(context)` `classmethod` ¶

Create a KnowledgeBaseService from a :class:UniqueContext.

This is the preferred constructor when using the service factory pattern.

Parameters:

Name	Type	Description	Default
`context`	`UniqueContext`	The request context carrying auth and chat information.	required

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

@classmethod
def from_context(cls, context: UniqueContext) -> Self:
    """Create a KnowledgeBaseService from a :class:`UniqueContext`.

    This is the preferred constructor when using the service factory pattern.

    Args:
        context: The request context carrying auth and chat information.
    """
    metadata_filter = (
        context.chat.metadata_filter if context.chat is not None else None
    )
    return cls(
        company_id=context.auth.get_confidential_company_id(),
        user_id=context.auth.get_confidential_user_id(),
        metadata_filter=metadata_filter,
    )

`from_event(event)` `classmethod` ¶

Initialize the ContentService with an event.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

@classmethod
@deprecated(
    "Use UniqueContext.from_chat_event(event) (if you have a ChatEvent) or "
    "UniqueContext.from_event(event) (for any BaseEvent) with UniqueServiceFactory instead."
)
def from_event(cls, event: BaseEvent[Any]):
    """
    Initialize the ContentService with an event.
    """
    metadata_filter = None

    if isinstance(event, (ChatEvent | Event)):
        metadata_filter = event.payload.metadata_filter

    return cls(
        company_id=event.company_id,
        user_id=event.user_id,
        metadata_filter=metadata_filter,
    )

`from_settings(settings=None, metadata_filter=None, **kwargs)` `classmethod` ¶

Initialize the ContentService with a settings object and metadata filter.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

@classmethod
def from_settings(
    cls,
    settings: UniqueSettings | str | None = None,
    metadata_filter: dict[str, Any] | None = None,
    **kwargs: Any,
):
    """
    Initialize the ContentService with a settings object and metadata filter.
    """

    if settings is None:
        settings = UniqueSettings.from_env_auto_with_sdk_init()
    elif isinstance(settings, str):
        settings = UniqueSettings.from_env_auto_with_sdk_init(filename=settings)

    if metadata_filter is None and settings.context.chat is not None:
        metadata_filter = settings.context.chat.metadata_filter

    return cls(
        company_id=settings.authcontext.get_confidential_company_id(),
        user_id=settings.authcontext.get_confidential_user_id(),
        metadata_filter=metadata_filter,
    )

`get_content_infos_async(*, metadata_filter=None, step_size=100, max_concurrent_requests=10)` `async` ¶

Fetches all content infos from the knowledge base using parallel pagination. The API limits responses to 100 items per request, so this method fetches the total count first, then retrieves all pages concurrently (bounded by max_concurrent_requests to avoid rate limiting or connection exhaustion).

Parameters:

Name	Type	Description	Default
`metadata_filter`	`dict[str, Any] \| None`	The metadata filter to use. Defaults to None.	`None`
`step_size`	`int`	Number of items per page. Defaults to 100.	`100`
`max_concurrent_requests`	`int`	Maximum number of concurrent API calls. Defaults to 10.	`10`

Returns:

Type	Description
`list[ContentInfo]`	list[ContentInfo]: All content infos visible to the user.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

async def get_content_infos_async(
    self,
    *,
    metadata_filter: dict[str, Any] | None = None,
    step_size: int = 100,
    max_concurrent_requests: int = 10,
) -> list[ContentInfo]:
    """
    Fetches all content infos from the knowledge base using parallel pagination.
    The API limits responses to 100 items per request, so this method fetches
    the total count first, then retrieves all pages concurrently (bounded by
    ``max_concurrent_requests`` to avoid rate limiting or connection exhaustion).

    Args:
        metadata_filter (dict[str, Any] | None): The metadata filter to use. Defaults to None.
        step_size (int): Number of items per page. Defaults to 100.
        max_concurrent_requests (int): Maximum number of concurrent API calls.
            Defaults to 10.

    Returns:
        list[ContentInfo]: All content infos visible to the user.
    """

    info_for_count_of_total_content = await self.get_paginated_content_infos_async(
        metadata_filter=metadata_filter,
        take=1,
    )

    total_count = info_for_count_of_total_content.total_count

    semaphore = asyncio.Semaphore(max_concurrent_requests)

    async def _fetch_page(skip: int) -> PaginatedContentInfos:
        async with semaphore:
            return await self.get_paginated_content_infos_async(
                metadata_filter=metadata_filter,
                skip=skip,
                take=step_size,
            )

    results: list[PaginatedContentInfos | BaseException] = await asyncio.gather(
        *[_fetch_page(i) for i in range(0, total_count, step_size)],
        return_exceptions=True,
    )

    for result in results:
        if isinstance(result, BaseException):
            _LOGGER.error("Error fetching paginated content infos", exc_info=result)

    return [
        content_info
        for result in results
        if not isinstance(result, BaseException)
        for content_info in result.content_infos
    ]

`get_file_names_in_folder(*, scope_id)` ¶

Get the list of file names in a knowledge base folder

Parameters:

Name	Type	Description	Default
`scope_id`	`str`	The scope id of the folder	required

Returns:

Type	Description
`list[str]`	list[str]: The list of file names in the folder

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def get_file_names_in_folder(self, *, scope_id: str) -> list[str]:
    """
    Get the list of file names in a knowledge base folder

    Args:
        scope_id (str): The scope id of the folder

    Returns:
        list[str]: The list of file names in the folder
    """
    smart_rule = Statement(
        operator=Operator.EQUALS, value=scope_id, path=["folderId"]
    )
    infos = self.get_paginated_content_infos(
        metadata_filter=smart_rule.model_dump(mode="json")
    )
    return [i.key for i in infos.content_infos]

`get_folder_path(*, scope_id)` ¶

Get the path of a folder from a scope id. Args: scope_id (str): The scope id of the folder.

Returns:

Name	Type	Description
`PurePath`	`PurePath`	The path of the folder.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def get_folder_path(self, *, scope_id: str) -> PurePath:
    """
    Get the path of a folder from a scope id.
    Args:
        scope_id (str): The scope id of the folder.

    Returns:
        PurePath: The path of the folder.
    """
    folder_path, _ = self._get_knowledge_base_location(scope_id=scope_id)
    return folder_path

`get_scope_id_path(*, scope_id)` ¶

Get the path of a folder from a scope id. Args: scope_id (str): The scope id of the folder.

Returns:

Type	Description
`list[str]`	list[str]: The list of scope ids from root to the folder.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def get_scope_id_path(self, *, scope_id: str) -> list[str]:
    """
    Get the path of a folder from a scope id.
    Args:
        scope_id (str): The scope id of the folder.

    Returns:
        list[str]: The list of scope ids from root to the folder.
    """
    _, list_of_scope_ids = self._get_knowledge_base_location(scope_id=scope_id)
    return list_of_scope_ids

`remove_content_metadata(*, content_info, keys_to_remove)` ¶

Removes the specified keys irreversibly from the content metadata.

Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def remove_content_metadata(
    self,
    *,
    content_info: ContentInfo,
    keys_to_remove: list[str],
) -> ContentInfo:
    """
    Removes the specified keys irreversibly from the content metadata.

    Note: Keys are camelized before being removed as metadata keys are stored in camelCase.
    """

    if content_info.metadata is None:
        _LOGGER.warning(f"Content metadata is None for content {content_info.id}")
        return content_info

    for key in keys_to_remove:
        content_info.metadata[humps.camelize(key)] = None

    return update_content(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_info.id,
        metadata=content_info.metadata or {},
    )

`remove_contents_metadata(*, keys_to_remove, metadata_filter=None, content_infos=None)` ¶

remove_contents_metadata(
    *,
    keys_to_remove: list[str],
    content_infos: list[ContentInfo],
) -> list[ContentInfo]

remove_contents_metadata(
    *,
    keys_to_remove: list[str],
    metadata_filter: dict[str, Any],
) -> list[ContentInfo]

Remove the specified keys irreversibly from the content metadata.

Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def remove_contents_metadata(
    self,
    *,
    keys_to_remove: list[str],
    metadata_filter: dict[str, Any] | None = None,
    content_infos: list[ContentInfo] | None = None,
) -> list[ContentInfo]:
    """Remove the specified keys irreversibly from the content metadata.

    Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

    """

    if content_infos is None:
        content_infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        ).content_infos

    for info in content_infos:
        self.remove_content_metadata(
            content_info=info, keys_to_remove=keys_to_remove
        )

    return content_infos

`resolve_visible_file_paths_async(*, metadata_filter=None)` `async` ¶

Resolves file paths visible to the current user asynchronously.

Returns each content item paired with its resolved file path segments.

Parameters:

Name	Type	Description	Default
`metadata_filter`	`dict[str, Any] \| None`	Optional metadata filter to narrow the content scope.	`None`

Returns:

Type	Description
`list[tuple[ContentInfo, list[str]]]`	list[tuple[ContentInfo, list[str]]]: Each tuple is `(content_info, [folder1, folder2, ..., filename])`.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

async def resolve_visible_file_paths_async(
    self,
    *,
    metadata_filter: dict[str, Any] | None = None,
) -> list[tuple[ContentInfo, list[str]]]:
    """Resolves file paths visible to the current user asynchronously.

    Returns each content item paired with its resolved file path segments.

    Args:
        metadata_filter: Optional metadata filter to narrow the content scope.

    Returns:
        list[tuple[ContentInfo, list[str]]]: Each tuple is
            ``(content_info, [folder1, folder2, ..., filename])``.
    """
    content_infos = await self.get_content_infos_async(
        metadata_filter=metadata_filter,
    )
    scope_ids = self.extract_scope_ids(content_infos)
    scope_id_to_folder_name = await self._translate_scope_ids_async(scope_ids)

    resolved: list[tuple[ContentInfo, list[str]]] = []
    for content_info in content_infos:
        if (
            content_info.metadata
            and (folder_id_path := content_info.metadata.get("folderIdPath"))
            is not None
            and isinstance(folder_id_path, str)
        ):
            file_path = [
                scope_id_to_folder_name.get(sid, sid)
                for sid in folder_id_path.replace("uniquepathid://", "").split("/")
                if sid
            ]
        else:
            file_path = ["_no_folder_path"]

        file_path.append(content_info.key)
        resolved.append((content_info, file_path))

    return resolved

`search_content_chunks(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None)` ¶

search_content_chunks(
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    metadata_filter: dict[str, Any],
    score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
) -> list[ContentChunk]

search_content_chunks(
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    metadata_filter: dict[str, Any],
    content_ids: list[str],
    score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
) -> list[ContentChunk]

Performs a synchronous search for content chunks in the knowledge base.

Parameters:

Name	Type	Description	Default
`search_string`	`str`	The search string.	required
`search_type`	`ContentSearchType`	The type of search to perform.	required
`limit`	`int`	The maximum number of results to return.	required
`search_language`	`str`	The language for the full-text search. Defaults to "english".	`DEFAULT_SEARCH_LANGUAGE`
`reranker_config`	`ContentRerankerConfig \| None`	The reranker configuration. Defaults to None.	`None`
`scope_ids`	`list[str] \| None`	Deprecated. Folded into `metadata_filter` as a `folderId in [scope_ids]` clause; do not use for new code.	`None`
`metadata_filter`	`dict \| None`	UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.	`None`
`content_ids`	`list[str] \| None`	The content IDs to search within. Defaults to None.	`None`
`score_threshold`	`float \| None`	Sets the minimum similarity score for search results to be considered. Defaults to 0.	`None`

Returns:

Type	Description
`list[ContentChunk]`	list[ContentChunk]: The search results.

Raises:

Type	Description
`Exception`	If there's an error during the search operation.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def search_content_chunks(
    self,
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
    scope_ids: list[str] | None = None,
    metadata_filter: dict[str, Any] | None = None,
    content_ids: list[str] | None = None,
    score_threshold: float | None = None,
) -> list[ContentChunk]:
    """
    Performs a synchronous search for content chunks in the knowledge base.

    Args:
        search_string (str): The search string.
        search_type (ContentSearchType): The type of search to perform.
        limit (int): The maximum number of results to return.
        search_language (str, optional): The language for the full-text search. Defaults to "english".
        reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
        scope_ids (list[str] | None, optional): Deprecated. Folded into ``metadata_filter``
            as a ``folderId in [scope_ids]`` clause; do not use for new code.
        metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
        content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
        score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.

    Returns:
        list[ContentChunk]: The search results.

    Raises:
        Exception: If there's an error during the search operation.
    """

    if metadata_filter is None:
        metadata_filter = self._metadata_filter

    if scope_ids:
        warnings.warn(
            "Passing scope_ids to KnowledgeBaseService.search_content_chunks is "
            "deprecated; use metadata_filter with folderId operator 'in' instead.",
            DeprecationWarning,
            stacklevel=2,
        )
        clause = build_folder_id_in_clause(scope_ids)
        metadata_filter = merge_scope_clause_into_metadata_filter(
            clause, metadata_filter
        )
        scope_ids = None

    try:
        searches = search_content_chunks(
            user_id=self._user_id,
            company_id=self._company_id,
            chat_id="",
            search_string=search_string,
            search_type=search_type,
            limit=limit,
            search_language=search_language,
            reranker_config=reranker_config,
            chat_only=False,
            metadata_filter=metadata_filter,
            content_ids=content_ids,
            score_threshold=score_threshold,
        )
        return searches
    except Exception as e:
        _LOGGER.error(f"Error while searching content chunks: {e}")
        raise e

`search_content_chunks_async(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None)` `async` ¶

search_content_chunks_async(
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    metadata_filter: dict[str, Any],
    score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
) -> list[ContentChunk]

search_content_chunks_async(
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    metadata_filter: dict[str, Any],
    content_ids: list[str],
    score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
) -> list[ContentChunk]

Performs an asynchronous search for content chunks in the knowledge base.

Parameters:

Name	Type	Description	Default
`search_string`	`str`	The search string.	required
`search_type`	`ContentSearchType`	The type of search to perform.	required
`limit`	`int`	The maximum number of results to return.	required
`search_language`	`str`	The language for the full-text search. Defaults to "english".	`DEFAULT_SEARCH_LANGUAGE`
`reranker_config`	`ContentRerankerConfig \| None`	The reranker configuration. Defaults to None.	`None`
`scope_ids`	`list[str] \| None`	Deprecated. Folded into `metadata_filter` as a `folderId in [scope_ids]` clause; do not use for new code.	`None`
`metadata_filter`	`dict \| None`	UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.	`None`
`content_ids`	`list[str] \| None`	The content IDs to search within. Defaults to None.	`None`
`score_threshold`	`float \| None`	Sets the minimum similarity score for search results to be considered. Defaults to 0.	`None`

Returns:

Type	Description
	list[ContentChunk]: The search results.

Raises:

Type	Description
`Exception`	If there's an error during the search operation.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

async def search_content_chunks_async(
    self,
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
    scope_ids: list[str] | None = None,
    metadata_filter: dict[str, Any] | None = None,
    content_ids: list[str] | None = None,
    score_threshold: float | None = None,
):
    """
    Performs an asynchronous search for content chunks in the knowledge base.

    Args:
        search_string (str): The search string.
        search_type (ContentSearchType): The type of search to perform.
        limit (int): The maximum number of results to return.
        search_language (str, optional): The language for the full-text search. Defaults to "english".
        reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
        scope_ids (list[str] | None, optional): Deprecated. Folded into ``metadata_filter``
            as a ``folderId in [scope_ids]`` clause; do not use for new code.
        metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
        content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
        score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.

    Returns:
        list[ContentChunk]: The search results.

    Raises:
        Exception: If there's an error during the search operation.
    """
    if metadata_filter is None:
        metadata_filter = self._metadata_filter

    if scope_ids:
        warnings.warn(
            "Passing scope_ids to KnowledgeBaseService.search_content_chunks_async is "
            "deprecated; use metadata_filter with folderId operator 'in' instead.",
            DeprecationWarning,
            stacklevel=2,
        )
        clause = build_folder_id_in_clause(scope_ids)
        metadata_filter = merge_scope_clause_into_metadata_filter(
            clause, metadata_filter
        )
        scope_ids = None

    try:
        searches = await search_content_chunks_async(
            user_id=self._user_id,
            company_id=self._company_id,
            chat_id="",
            search_string=search_string,
            search_type=search_type,
            limit=limit,
            search_language=search_language,
            reranker_config=reranker_config,
            chat_only=False,
            metadata_filter=metadata_filter,
            content_ids=content_ids,
            score_threshold=score_threshold,
        )
        return searches
    except Exception as e:
        _LOGGER.error(f"Error while searching content chunks: {e}")
        raise e

`search_contents(*, where, include_failed_content=False)` ¶

Performs a search in the knowledge base by filter (and not a smilarity search) This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.

Parameters:

Name	Type	Description	Default
`where`	`dict`	The search criteria.	required

Returns:

Type	Description
`list[Content]`	list[Content]: The search results.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def search_contents(
    self,
    *,
    where: dict[str, Any],
    include_failed_content: bool = False,
) -> list[Content]:
    """
    Performs a search in the knowledge base by filter (and not a smilarity search)
    This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.

    Args:
        where (dict): The search criteria.

    Returns:
        list[Content]: The search results.
    """

    return search_contents(
        user_id=self._user_id,
        company_id=self._company_id,
        chat_id="",
        where=where,
        include_failed_content=include_failed_content,
    )

`search_contents_async(*, where, include_failed_content=False)` `async` ¶

Performs an asynchronous search for content files in the knowledge base by filter.

Parameters:

Name	Type	Description	Default
`where`	`dict`	The search criteria.	required

Returns:

Type	Description
`list[Content]`	list[Content]: The search results.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

async def search_contents_async(
    self,
    *,
    where: dict[str, Any],
    include_failed_content: bool = False,
) -> list[Content]:
    """
    Performs an asynchronous search for content files in the knowledge base by filter.

    Args:
        where (dict): The search criteria.

    Returns:
        list[Content]: The search results.
    """

    return await search_contents_async(
        user_id=self._user_id,
        company_id=self._company_id,
        chat_id="",
        where=where,
        include_failed_content=include_failed_content,
    )

`update_contents_metadata(*, additional_metadata, metadata_filter=None, content_infos=None)` ¶

update_contents_metadata(
    *,
    additional_metadata: dict[str, Any],
    content_infos: list[ContentInfo],
) -> list[ContentInfo]

update_contents_metadata(
    *,
    additional_metadata: dict[str, Any],
    metadata_filter: dict[str, Any],
) -> list[ContentInfo]

Update the metadata of the contents matching the metadata filter.

Note: Keys are camelized before being updated as metadata keys are stored in camelCase.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def update_contents_metadata(
    self,
    *,
    additional_metadata: dict[str, Any],
    metadata_filter: dict[str, Any] | None = None,
    content_infos: list[ContentInfo] | None = None,
) -> list[ContentInfo]:
    """Update the metadata of the contents matching the metadata filter.

    Note: Keys are camelized before being updated as metadata keys are stored in camelCase.
    """

    additional_metadata_camelized = humps.camelize(additional_metadata)
    additional_metadata_camelized = self._pop_forbidden_metadata_keys(
        additional_metadata_camelized
    )

    if content_infos is None:
        content_infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        ).content_infos

    for info in content_infos:
        self.update_content_metadata(
            content_info=info, additional_metadata=additional_metadata_camelized
        )

    return content_infos

`upload_content(path_to_content, content_name, mime_type, scope_id, skip_ingestion=False, skip_excel_ingestion=False, ingestion_config=None, metadata=None)` ¶

Uploads content to the knowledge base.

Parameters:

Name	Type	Description	Default
`path_to_content`	`str`	The path to the content to upload.	required
`content_name`	`str`	The name of the content.	required
`mime_type`	`str`	The MIME type of the content.	required
`scope_id`	`str \| None`	The scope ID. Defaults to None.	required
`skip_ingestion`	`bool`	Whether to skip ingestion. Defaults to False.	`False`
`skip_excel_ingestion`	`bool`	Whether to skip excel ingestion. Defaults to False.	`False`
`ingestion_config`	`IngestionConfig \| None`	The ingestion configuration. Defaults to None.	`None`
`metadata`	`dict[str, Any] \| None`	The metadata to associate with the content. Defaults to None.	`None`

Returns:

Name	Type	Description
`Content`	`Content`	The uploaded content.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def upload_content(
    self,
    path_to_content: str,
    content_name: str,
    mime_type: str,
    scope_id: str,
    skip_ingestion: bool = False,
    skip_excel_ingestion: bool = False,
    ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
    metadata: dict[str, Any] | None = None,
) -> Content:
    """
    Uploads content to the knowledge base.

    Args:
        path_to_content (str): The path to the content to upload.
        content_name (str): The name of the content.
        mime_type (str): The MIME type of the content.
        scope_id (str | None): The scope ID. Defaults to None.
        skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
        skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
        ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
        metadata (dict[str, Any] | None): The metadata to associate with the content. Defaults to None.

    Returns:
        Content: The uploaded content.
    """

    return upload_content(
        user_id=self._user_id,
        company_id=self._company_id,
        path_to_content=path_to_content,
        content_name=content_name,
        mime_type=mime_type,
        scope_id=scope_id,
        chat_id="",
        skip_ingestion=skip_ingestion,
        skip_excel_ingestion=skip_excel_ingestion,
        ingestion_config=ingestion_config,
        metadata=metadata,
    )

`upload_content_from_bytes(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None)` ¶

Uploads content to the knowledge base.

Parameters:

Name	Type	Description	Default
`content`	`bytes`	The content to upload.	required
`content_name`	`str`	The name of the content.	required
`mime_type`	`str`	The MIME type of the content.	required
`scope_id`	`str \| None`	The scope ID. Defaults to None.	required
`skip_ingestion`	`bool`	Whether to skip ingestion. Defaults to False.	`False`
`ingestion_config`	`IngestionConfig \| None`	The ingestion configuration. Defaults to None.	`None`
`metadata`	`dict \| None`	The metadata to associate with the content. Defaults to None.	`None`

Returns:

Name	Type	Description
`Content`	`Content`	The uploaded content.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def upload_content_from_bytes(
    self,
    content: bytes,
    *,
    content_name: str,
    mime_type: str,
    scope_id: str,
    skip_ingestion: bool = False,
    ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
    metadata: dict[str, Any] | None = None,
) -> Content:
    """
    Uploads content to the knowledge base.

    Args:
        content (bytes): The content to upload.
        content_name (str): The name of the content.
        mime_type (str): The MIME type of the content.
        scope_id (str | None): The scope ID. Defaults to None.
        skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
        ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
        metadata (dict | None): The metadata to associate with the content. Defaults to None.

    Returns:
        Content: The uploaded content.
    """

    return upload_content_from_bytes(
        user_id=self._user_id,
        company_id=self._company_id,
        content=content,
        content_name=content_name,
        mime_type=mime_type,
        scope_id=scope_id,
        chat_id="",
        skip_ingestion=skip_ingestion,
        ingestion_config=ingestion_config,
        metadata=metadata,
    )

`upload_content_from_bytes_async(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None)` `async` ¶

Uploads content to the knowledge base.

Parameters:

Name	Type	Description	Default
`content`	`bytes`	The content to upload.	required
`content_name`	`str`	The name of the content.	required
`mime_type`	`str`	The MIME type of the content.	required
`scope_id`	`str \| None`	The scope ID. Defaults to None.	required
`skip_ingestion`	`bool`	Whether to skip ingestion. Defaults to False.	`False`
`skip_excel_ingestion`	`bool`	Whether to skip excel ingestion. Defaults to False.	required
`ingestion_config`	`IngestionConfig \| None`	The ingestion configuration. Defaults to None.	`None`
`metadata`	`dict \| None`	The metadata to associate with the content. Defaults to None.	`None`

Returns:

Name	Type	Description
`Content`	`Content`	The uploaded content.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

async def upload_content_from_bytes_async(
    self,
    content: bytes,
    *,
    content_name: str,
    mime_type: str,
    scope_id: str,
    skip_ingestion: bool = False,
    ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
    metadata: dict[str, Any] | None = None,
) -> Content:
    """
    Uploads content to the knowledge base.

    Args:
        content (bytes): The content to upload.
        content_name (str): The name of the content.
        mime_type (str): The MIME type of the content.
        scope_id (str | None): The scope ID. Defaults to None.
        skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
        skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
        ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
        metadata (dict | None): The metadata to associate with the content. Defaults to None.

    Returns:
        Content: The uploaded content.
    """

    return await upload_content_from_bytes_async(
        user_id=self._user_id,
        company_id=self._company_id,
        content=content,
        content_name=content_name,
        mime_type=mime_type,
        scope_id=scope_id,
        chat_id="",
        skip_ingestion=skip_ingestion,
        ingestion_config=ingestion_config,
        metadata=metadata,
    )

Knowledge Base Module

Service¶

unique_toolkit.services.knowledge_base ¶

KnowledgeBaseService ¶

__init__(company_id, user_id, metadata_filter=None) ¶

batch_file_upload(*, local_files, remote_folders, overwrite=False, metadata_generator=None) ¶

create_folders(*, paths) ¶

delete_content(*, content_id=None, file_path=None) ¶

delete_contents(*, metadata_filter) ¶

delete_contents_async(*, metadata_filter) async ¶

download_content_to_bytes(*, content_id) ¶

download_content_to_bytes_async(*, content_id) async ¶

download_content_to_file(*, content_id, output_dir_path=None, output_filename=None) ¶

extract_scope_ids(content_infos) staticmethod ¶

from_context(context) classmethod ¶

from_event(event) classmethod ¶

from_settings(settings=None, metadata_filter=None, **kwargs) classmethod ¶

get_content_infos_async(*, metadata_filter=None, step_size=100, max_concurrent_requests=10) async ¶

get_file_names_in_folder(*, scope_id) ¶

get_folder_path(*, scope_id) ¶

get_scope_id_path(*, scope_id) ¶

remove_content_metadata(*, content_info, keys_to_remove) ¶

remove_contents_metadata(*, keys_to_remove, metadata_filter=None, content_infos=None) ¶

resolve_visible_file_paths_async(*, metadata_filter=None) async ¶

search_content_chunks(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None) ¶

search_content_chunks_async(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None) async ¶

search_contents(*, where, include_failed_content=False) ¶

search_contents_async(*, where, include_failed_content=False) async ¶

update_contents_metadata(*, additional_metadata, metadata_filter=None, content_infos=None) ¶

upload_content(path_to_content, content_name, mime_type, scope_id, skip_ingestion=False, skip_excel_ingestion=False, ingestion_config=None, metadata=None) ¶

upload_content_from_bytes(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None) ¶

upload_content_from_bytes_async(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None) async ¶

`unique_toolkit.services.knowledge_base` ¶

`KnowledgeBaseService` ¶

`init(company_id, user_id, metadata_filter=None)` ¶

`batch_file_upload(*, local_files, remote_folders, overwrite=False, metadata_generator=None)` ¶

`create_folders(*, paths)` ¶

`delete_content(*, content_id=None, file_path=None)` ¶

`delete_contents(*, metadata_filter)` ¶

`delete_contents_async(*, metadata_filter)` `async` ¶

`download_content_to_bytes(*, content_id)` ¶

`download_content_to_bytes_async(*, content_id)` `async` ¶

`download_content_to_file(*, content_id, output_dir_path=None, output_filename=None)` ¶

`extract_scope_ids(content_infos)` `staticmethod` ¶

`from_context(context)` `classmethod` ¶

`from_event(event)` `classmethod` ¶

`from_settings(settings=None, metadata_filter=None, **kwargs)` `classmethod` ¶

`get_content_infos_async(*, metadata_filter=None, step_size=100, max_concurrent_requests=10)` `async` ¶

`get_file_names_in_folder(*, scope_id)` ¶

`get_folder_path(*, scope_id)` ¶

`get_scope_id_path(*, scope_id)` ¶

`remove_content_metadata(*, content_info, keys_to_remove)` ¶

`remove_contents_metadata(*, keys_to_remove, metadata_filter=None, content_infos=None)` ¶

`resolve_visible_file_paths_async(*, metadata_filter=None)` `async` ¶

`search_content_chunks(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None)` ¶

`search_content_chunks_async(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None)` `async` ¶

`search_contents(*, where, include_failed_content=False)` ¶

`search_contents_async(*, where, include_failed_content=False)` `async` ¶

`update_contents_metadata(*, additional_metadata, metadata_filter=None, content_infos=None)` ¶

`upload_content(path_to_content, content_name, mime_type, scope_id, skip_ingestion=False, skip_excel_ingestion=False, ingestion_config=None, metadata=None)` ¶

`upload_content_from_bytes(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None)` ¶

`upload_content_from_bytes_async(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None)` `async` ¶