Knowledge Base Module

Service¶

`unique_toolkit.services.knowledge_base` ¶

`KnowledgeBaseService` ¶

Provides methods for searching, downloading and uploading content in the knowledge base.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

class KnowledgeBaseService:
    """
    Provides methods for searching, downloading and uploading content in the knowledge base.
    """

    def __init__(
        self,
        company_id: str,
        user_id: str,
        metadata_filter: dict | None = None,
    ):
        """
        Initialize the ContentService with a company_id, user_id and chat_id.
        """

        self._metadata_filter = None
        [company_id, user_id] = validate_required_values([company_id, user_id])
        self._company_id = company_id
        self._user_id = user_id
        self._metadata_filter = metadata_filter

    @classmethod
    def from_event(cls, event: BaseEvent):
        """
        Initialize the ContentService with an event.
        """
        metadata_filter = None

        if isinstance(event, (ChatEvent | Event)):
            metadata_filter = event.payload.metadata_filter

        return cls(
            company_id=event.company_id,
            user_id=event.user_id,
            metadata_filter=metadata_filter,
        )

    @classmethod
    def from_settings(
        cls,
        settings: UniqueSettings | str | None = None,
        metadata_filter: dict | None = None,
    ):
        """
        Initialize the ContentService with a settings object and metadata filter.
        """

        if settings is None:
            settings = UniqueSettings.from_env_auto_with_sdk_init()
        elif isinstance(settings, str):
            settings = UniqueSettings.from_env_auto_with_sdk_init(filename=settings)

        return cls(
            company_id=settings.auth.company_id.get_secret_value(),
            user_id=settings.auth.user_id.get_secret_value(),
            metadata_filter=metadata_filter,
        )

    # Content Search
    # ------------------------------------------------------------------------------------------------

    @overload
    def search_content_chunks(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        scope_ids: list[str],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    @overload
    def search_content_chunks(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict,
        scope_ids: list[str] | None = None,
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    @overload
    def search_content_chunks(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict,
        content_ids: list[str],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    def search_content_chunks(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
        scope_ids: list[str] | None = None,
        metadata_filter: dict | None = None,
        content_ids: list[str] | None = None,
        score_threshold: float | None = None,
    ) -> list[ContentChunk]:
        """
        Performs a synchronous search for content chunks in the knowledge base.

        Args:
            search_string (str): The search string.
            search_type (ContentSearchType): The type of search to perform.
            limit (int): The maximum number of results to return.
            search_language (str, optional): The language for the full-text search. Defaults to "english".
            reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
            scope_ids (list[str] | None, optional): The scope IDs to filter by. Defaults to None.
            metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
            content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
            score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.

        Returns:
            list[ContentChunk]: The search results.

        Raises:
            Exception: If there's an error during the search operation.
        """

        if metadata_filter is None:
            metadata_filter = self._metadata_filter

        try:
            searches = search_content_chunks(
                user_id=self._user_id,
                company_id=self._company_id,
                chat_id="",
                search_string=search_string,
                search_type=search_type,
                limit=limit,
                search_language=search_language,
                reranker_config=reranker_config,
                scope_ids=scope_ids,
                chat_only=False,
                metadata_filter=metadata_filter,
                content_ids=content_ids,
                score_threshold=score_threshold,
            )
            return searches
        except Exception as e:
            _LOGGER.error(f"Error while searching content chunks: {e}")
            raise e

    @overload
    async def search_content_chunks_async(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        scope_ids: list[str],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    @overload
    async def search_content_chunks_async(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict,
        scope_ids: list[str] | None = None,
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    @overload
    async def search_content_chunks_async(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        metadata_filter: dict,
        content_ids: list[str],
        score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
    ) -> list[ContentChunk]: ...

    async def search_content_chunks_async(
        self,
        *,
        search_string: str,
        search_type: ContentSearchType,
        limit: int,
        search_language: str = DEFAULT_SEARCH_LANGUAGE,
        reranker_config: ContentRerankerConfig | None = None,
        scope_ids: list[str] | None = None,
        metadata_filter: dict | None = None,
        content_ids: list[str] | None = None,
        score_threshold: float | None = None,
    ):
        """
        Performs an asynchronous search for content chunks in the knowledge base.

        Args:
            search_string (str): The search string.
            search_type (ContentSearchType): The type of search to perform.
            limit (int): The maximum number of results to return.
            search_language (str, optional): The language for the full-text search. Defaults to "english".
            reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
            scope_ids (list[str] | None, optional): The scope IDs to filter by. Defaults to None.
            metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
            content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
            score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.

        Returns:
            list[ContentChunk]: The search results.

        Raises:
            Exception: If there's an error during the search operation.
        """
        if metadata_filter is None:
            metadata_filter = self._metadata_filter

        try:
            searches = await search_content_chunks_async(
                user_id=self._user_id,
                company_id=self._company_id,
                chat_id="",
                search_string=search_string,
                search_type=search_type,
                limit=limit,
                search_language=search_language,
                reranker_config=reranker_config,
                scope_ids=scope_ids,
                chat_only=False,
                metadata_filter=metadata_filter,
                content_ids=content_ids,
                score_threshold=score_threshold,
            )
            return searches
        except Exception as e:
            _LOGGER.error(f"Error while searching content chunks: {e}")
            raise e

    def search_contents(
        self,
        *,
        where: dict,
        include_failed_content: bool = False,
    ) -> list[Content]:
        """
        Performs a search in the knowledge base by filter (and not a smilarity search)
        This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.

        Args:
            where (dict): The search criteria.

        Returns:
            list[Content]: The search results.
        """

        return search_contents(
            user_id=self._user_id,
            company_id=self._company_id,
            chat_id="",
            where=where,
            include_failed_content=include_failed_content,
        )

    async def search_contents_async(
        self,
        *,
        where: dict,
        include_failed_content: bool = False,
    ) -> list[Content]:
        """
        Performs an asynchronous search for content files in the knowledge base by filter.

        Args:
            where (dict): The search criteria.

        Returns:
            list[Content]: The search results.
        """

        return await search_contents_async(
            user_id=self._user_id,
            company_id=self._company_id,
            chat_id="",
            where=where,
            include_failed_content=include_failed_content,
        )

    # Content Management
    # ------------------------------------------------------------------------------------------------

    def upload_content_from_bytes(
        self,
        content: bytes,
        *,
        content_name: str,
        mime_type: str,
        scope_id: str,
        skip_ingestion: bool = False,
        ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
        metadata: dict | None = None,
    ) -> Content:
        """
        Uploads content to the knowledge base.

        Args:
            content (bytes): The content to upload.
            content_name (str): The name of the content.
            mime_type (str): The MIME type of the content.
            scope_id (str | None): The scope ID. Defaults to None.
            skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
            ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
            metadata (dict | None): The metadata to associate with the content. Defaults to None.

        Returns:
            Content: The uploaded content.
        """

        return upload_content_from_bytes(
            user_id=self._user_id,
            company_id=self._company_id,
            content=content,
            content_name=content_name,
            mime_type=mime_type,
            scope_id=scope_id,
            chat_id="",
            skip_ingestion=skip_ingestion,
            ingestion_config=ingestion_config,
            metadata=metadata,
        )

    async def upload_content_from_bytes_async(
        self,
        content: bytes,
        *,
        content_name: str,
        mime_type: str,
        scope_id: str,
        skip_ingestion: bool = False,
        ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
        metadata: dict | None = None,
    ) -> Content:
        """
        Uploads content to the knowledge base.

        Args:
            content (bytes): The content to upload.
            content_name (str): The name of the content.
            mime_type (str): The MIME type of the content.
            scope_id (str | None): The scope ID. Defaults to None.
            skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
            skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
            ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
            metadata (dict | None): The metadata to associate with the content. Defaults to None.

        Returns:
            Content: The uploaded content.
        """

        return await upload_content_from_bytes_async(
            user_id=self._user_id,
            company_id=self._company_id,
            content=content,
            content_name=content_name,
            mime_type=mime_type,
            scope_id=scope_id,
            chat_id="",
            skip_ingestion=skip_ingestion,
            ingestion_config=ingestion_config,
            metadata=metadata,
        )

    def upload_content(
        self,
        path_to_content: str,
        content_name: str,
        mime_type: str,
        scope_id: str,
        skip_ingestion: bool = False,
        skip_excel_ingestion: bool = False,
        ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> Content:
        """
        Uploads content to the knowledge base.

        Args:
            path_to_content (str): The path to the content to upload.
            content_name (str): The name of the content.
            mime_type (str): The MIME type of the content.
            scope_id (str | None): The scope ID. Defaults to None.
            skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
            skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
            ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
            metadata (dict[str, Any] | None): The metadata to associate with the content. Defaults to None.

        Returns:
            Content: The uploaded content.
        """

        return upload_content(
            user_id=self._user_id,
            company_id=self._company_id,
            path_to_content=path_to_content,
            content_name=content_name,
            mime_type=mime_type,
            scope_id=scope_id,
            chat_id="",
            skip_ingestion=skip_ingestion,
            skip_excel_ingestion=skip_excel_ingestion,
            ingestion_config=ingestion_config,
            metadata=metadata,
        )

    def download_content_to_file(
        self,
        *,
        content_id: str,
        output_dir_path: Path | None = None,
        output_filename: str | None = None,
    ) -> Path:
        """
        Downloads content from a chat and saves it to a file.

        Args:
            content_id (str): The ID of the content to download.
            output_filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
            output_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".

        Returns:
            Path: The path to the downloaded file.

        Raises:
            Exception: If the download fails or the filename cannot be determined.
        """

        return download_content_to_file_by_id(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            chat_id="",
            filename=output_filename,
            tmp_dir_path=output_dir_path,
        )

    def download_content_to_bytes(
        self,
        *,
        content_id: str,
    ) -> bytes:
        """
        Downloads content to memory

        Args:
            content_id (str): The id of the uploaded content.

        Returns:
            bytes: The downloaded content.

        Raises:
            Exception: If the download fails.
        """

        return download_content_to_bytes(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            chat_id=None,
        )

    def batch_file_upload(
        self,
        *,
        local_files: list[Path],
        remote_folders: list[PurePath],
        overwrite: bool = False,
        metadata_generator: Callable[[Path, PurePath], dict[str, Any]] | None = None,
    ) -> None:
        """
        Upload files to the knowledge base into corresponding folders

        Args:
            local_files (list[Path]): The local files to upload
            remote_folders (list[PurePath]): The remote folders to upload the files to
            overwrite (bool): Whether to overwrite existing files
            metadata_generator (Callable[[Path, PurePath], dict[str, Any]] | None): The metadata generator function

        Returns:
            None
        """

        if len(local_files) != len(remote_folders):
            raise ValueError(
                "The number of local files and remote folders must be the same"
            )

        creation_result = self.create_folders(paths=remote_folders)

        folders_path_to_scope_id = {
            folder_path: result.id
            for folder_path, result in zip(remote_folders, creation_result)
        }

        _old_scope_id = None
        _existing_file_names: list[str] = []

        for remote_folder_path, local_file_path in zip(remote_folders, local_files):
            scope_id = folders_path_to_scope_id[remote_folder_path]
            mime_type = mimetypes.guess_type(local_file_path.name)[0]

            if mime_type is None:
                _LOGGER.warning(
                    f"No mime type found for file {local_file_path.name}, skipping"
                )
                continue

            if not overwrite:
                if _old_scope_id is None or _old_scope_id != scope_id:
                    _LOGGER.debug(f"Switching to new folder {scope_id}")
                    _old_scope_id = scope_id
                    _existing_file_names = self.get_file_names_in_folder(
                        scope_id=scope_id
                    )

                if local_file_path.name in _existing_file_names:
                    _LOGGER.warning(
                        f"File {local_file_path.name} already exists in folder {scope_id}, skipping"
                    )
                    continue

            metadata = None
            if metadata_generator is not None:
                metadata = metadata_generator(local_file_path, remote_folder_path)

            self.upload_content(
                path_to_content=str(local_file_path),
                content_name=local_file_path.name,
                mime_type=mime_type,
                scope_id=scope_id,
                metadata=metadata,
            )

    # Content Information
    # ------------------------------------------------------------------------------------------------
    def get_paginated_content_infos(
        self,
        *,
        metadata_filter: dict[str, Any] | None = None,
        skip: int | None = None,
        take: int | None = None,
        file_path: str | None = None,
    ) -> PaginatedContentInfos:
        return get_content_info(
            user_id=self._user_id,
            company_id=self._company_id,
            metadata_filter=metadata_filter,
            skip=skip,
            take=take,
            file_path=file_path,
        )

    def get_file_names_in_folder(self, *, scope_id: str) -> list[str]:
        """
        Get the list of file names in a knowledge base folder

        Args:
            scope_id (str): The scope id of the folder

        Returns:
            list[str]: The list of file names in the folder
        """
        smart_rule = Statement(
            operator=Operator.EQUALS, value=scope_id, path=["folderId"]
        )
        infos = self.get_paginated_content_infos(
            metadata_filter=smart_rule.model_dump(mode="json")
        )
        return [i.key for i in infos.content_infos]

    # Folder Management
    # ------------------------------------------------------------------------------------------------

    def get_folder_info(
        self,
        *,
        scope_id: str,
    ) -> FolderInfo:
        return get_folder_info(
            user_id=self._user_id,
            company_id=self._company_id,
            scope_id=scope_id,
        )

    def _resolve_visible_file_tree(self, content_infos: list[ContentInfo]) -> list[str]:
        # collect all scope ids
        folder_id_paths: set[str] = set()
        known_folder_paths: set[str] = set()
        for content_info in content_infos:
            if (
                content_info.metadata
                and content_info.metadata.get(r"{FullPath}") is not None
            ):
                known_folder_paths.add(str(content_info.metadata.get(r"{FullPath}")))
                continue

            if (
                content_info.metadata
                and content_info.metadata.get("folderIdPath") is not None
            ):
                folder_id_paths.add(str(content_info.metadata.get("folderIdPath")))

        scope_ids: set[str] = set()
        for fp in folder_id_paths:
            scope_ids_list = set(fp.replace("uniquepathid://", "").split("/"))
            scope_ids.update(scope_ids_list)

        scope_id_to_folder_name: dict[str, str] = {}
        for scope_id in scope_ids:
            folder_info = self.get_folder_info(
                scope_id=scope_id,
            )
            scope_id_to_folder_name[scope_id] = folder_info.name

        folder_paths: set[str] = set()
        for folder_id_path in folder_id_paths:
            scope_ids_list = folder_id_path.replace("uniquepathid://", "").split("/")

            if all(scope_id in scope_id_to_folder_name for scope_id in scope_ids_list):
                folder_path = [
                    scope_id_to_folder_name[scope_id] for scope_id in scope_ids_list
                ]
                folder_paths.add("/".join(folder_path))

        return [
            p if p.startswith("/") else f"/{p}"
            for p in folder_paths.union(known_folder_paths)
        ]

    def resolve_visible_file_tree(
        self, *, metadata_filter: dict[str, Any] | None = None
    ) -> list[str]:
        """
        Resolves the visible file tree for the knowledge base for the current user.

        Args:
            metadata_filter (dict[str, Any] | None): The metadata filter to use. Defaults to None.

        Returns:
            list[str]: The visible file tree.



        """
        info = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        )

        return self._resolve_visible_file_tree(content_infos=info.content_infos)

    def _pop_forbidden_metadata_keys(self, metadata: dict[str, Any]) -> dict[str, Any]:
        forbidden_keys = [
            "key",
            "url",
            "title",
            "folderId",
            "mimeType",
            "companyId",
            "contentId",
            "folderIdPath",
            "externalFileOwner",
        ]
        for key in forbidden_keys:
            metadata.pop(key, None)
        return metadata

    def create_folders(self, *, paths: list[PurePath]) -> list[BaseFolderInfo]:
        """
        Create folders in the knowledge base if the path does not exists.

        Args:
            paths (list[PurePath]): The paths to create the folders at

        Returns:
            list[BaseFolderInfo]: The information about the created folders or existing folders
        """
        result = unique_sdk.Folder.create_paths(
            user_id=self._user_id,
            company_id=self._company_id,
            paths=[path.as_posix() for path in paths],
        )
        return [
            BaseFolderInfo.model_validate(folder, by_alias=True, by_name=True)
            for folder in result["createdFolders"]
        ]

        # Metadata

    # Metadata Management
    # ------------------------------------------------------------------------------------------------

    def replace_content_metadata(
        self,
        *,
        content_id: str,
        metadata: dict[str, Any],
    ) -> ContentInfo:
        return update_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            metadata=metadata,
        )

    def update_content_metadata(
        self,
        *,
        content_info: ContentInfo,
        additional_metadata: dict[str, Any],
    ) -> ContentInfo:
        camelized_additional_metadata = humps.camelize(additional_metadata)
        camelized_additional_metadata = self._pop_forbidden_metadata_keys(
            camelized_additional_metadata
        )

        if content_info.metadata is not None:
            content_info.metadata.update(camelized_additional_metadata)
        else:
            content_info.metadata = camelized_additional_metadata

        return update_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_info.id,
            metadata=content_info.metadata,
        )

    def remove_content_metadata(
        self,
        *,
        content_info: ContentInfo,
        keys_to_remove: list[str],
    ) -> ContentInfo:
        """
        Removes the specified keys irreversibly from the content metadata.

        Note: Keys are camelized before being removed as metadata keys are stored in camelCase.
        """

        if content_info.metadata is None:
            _LOGGER.warning(f"Content metadata is None for content {content_info.id}")
            return content_info

        for key in keys_to_remove:
            content_info.metadata[humps.camelize(key)] = None

        return update_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_info.id,
            metadata=content_info.metadata or {},
        )

    @overload
    def update_contents_metadata(
        self,
        *,
        additional_metadata: dict[str, Any],
        content_infos: list[ContentInfo],
    ) -> list[ContentInfo]: ...

    @overload
    def update_contents_metadata(
        self, *, additional_metadata: dict[str, Any], metadata_filter: dict[str, Any]
    ) -> list[ContentInfo]: ...

    def update_contents_metadata(
        self,
        *,
        additional_metadata: dict[str, Any],
        metadata_filter: dict[str, Any] | None = None,
        content_infos: list[ContentInfo] | None = None,
    ) -> list[ContentInfo]:
        """Update the metadata of the contents matching the metadata filter.

        Note: Keys are camelized before being updated as metadata keys are stored in camelCase.
        """

        additional_metadata_camelized = humps.camelize(additional_metadata)
        additional_metadata_camelized = self._pop_forbidden_metadata_keys(
            additional_metadata_camelized
        )

        if content_infos is None:
            content_infos = self.get_paginated_content_infos(
                metadata_filter=metadata_filter,
            ).content_infos

        for info in content_infos:
            self.update_content_metadata(
                content_info=info, additional_metadata=additional_metadata_camelized
            )

        return content_infos

    @overload
    def remove_contents_metadata(
        self,
        *,
        keys_to_remove: list[str],
        content_infos: list[ContentInfo],
    ) -> list[ContentInfo]: ...

    @overload
    def remove_contents_metadata(
        self, *, keys_to_remove: list[str], metadata_filter: dict[str, Any]
    ) -> list[ContentInfo]: ...

    def remove_contents_metadata(
        self,
        *,
        keys_to_remove: list[str],
        metadata_filter: dict[str, Any] | None = None,
        content_infos: list[ContentInfo] | None = None,
    ) -> list[ContentInfo]:
        """Remove the specified keys irreversibly from the content metadata.

        Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

        """

        if content_infos is None:
            content_infos = self.get_paginated_content_infos(
                metadata_filter=metadata_filter,
            ).content_infos

        for info in content_infos:
            self.remove_content_metadata(
                content_info=info, keys_to_remove=keys_to_remove
            )

        return content_infos

    # Delete
    # ------------------------------------------------------------------------------------------------

    @overload
    def delete_content(
        self,
        *,
        content_id: str,
    ) -> DeleteContentResponse: ...

    """Delete content by id"""

    @overload
    def delete_content(
        self,
        *,
        file_path: str,
    ) -> DeleteContentResponse: ...

    """Delete all content matching the file path"""

    def delete_content(
        self,
        *,
        content_id: str | None = None,
        file_path: str | None = None,
    ) -> DeleteContentResponse:
        """Delete content by id, file path or metadata filter"""

        return delete_content(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            file_path=file_path,
        )

    def delete_contents(
        self,
        *,
        metadata_filter: dict[str, Any],
    ) -> list[DeleteContentResponse]:
        """Delete all content matching the metadata filter"""
        resp: list[DeleteContentResponse] = []

        if metadata_filter:
            infos = self.get_paginated_content_infos(
                metadata_filter=metadata_filter,
            )

            for info in infos.content_infos:
                resp.append(
                    delete_content(
                        user_id=self._user_id,
                        company_id=self._company_id,
                        content_id=info.id,
                    )
                )

        return resp

    @overload
    async def delete_content_async(
        self,
        *,
        content_id: str,
    ) -> DeleteContentResponse: ...

    @overload
    async def delete_content_async(
        self,
        *,
        file_path: str,
    ) -> DeleteContentResponse: ...

    async def delete_content_async(
        self,
        *,
        content_id: str | None = None,
        file_path: str | None = None,
    ) -> DeleteContentResponse:
        return await delete_content_async(
            user_id=self._user_id,
            company_id=self._company_id,
            content_id=content_id,
            file_path=file_path,
        )

    async def delete_contents_async(
        self,
        *,
        metadata_filter: dict[str, Any],
    ) -> list[DeleteContentResponse]:
        """Delete all content matching the metadata filter"""
        if not metadata_filter:
            return []

        infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        )

        # Create all delete tasks without awaiting them
        delete_tasks = [
            delete_content_async(
                user_id=self._user_id,
                company_id=self._company_id,
                content_id=info.id,
            )
            for info in infos.content_infos
        ]

        # Await all delete operations concurrently
        resp = await asyncio.gather(*delete_tasks)

        return list(resp)

    def _get_knowledge_base_location(
        self, *, scope_id: str
    ) -> tuple[PurePath, list[str]]:
        """
        Get the path of a folder from a scope id.

        Args:
            scope_id (str): The scope id of the folder.

        Returns:
            PurePath: The path of the folder.
            list[str]: The list of scope ids from root to the folder.
        """

        list_of_folder_names: list[str] = []
        list_of_scope_ids: list[str] = []
        folder_info = self.get_folder_info(scope_id=scope_id)
        list_of_scope_ids.append(folder_info.id)
        if folder_info.parent_id is not None:
            list_of_folder_names.append(folder_info.name)
        else:
            return PurePath("/" + folder_info.name), list_of_scope_ids

        while folder_info.parent_id is not None:
            folder_info = self.get_folder_info(scope_id=folder_info.parent_id)
            list_of_folder_names.append(folder_info.name)

        list_of_scope_ids.reverse()
        return PurePath("/" + "/".join(list_of_folder_names[::-1])), list_of_scope_ids

    # Utility Functions
    # ------------------------------------------------------------------------------------------------

    def get_folder_path(self, *, scope_id: str) -> PurePath:
        """
        Get the path of a folder from a scope id.
        Args:
            scope_id (str): The scope id of the folder.

        Returns:
            PurePath: The path of the folder.
        """
        folder_path, _ = self._get_knowledge_base_location(scope_id=scope_id)
        return folder_path

    def get_scope_id_path(self, *, scope_id: str) -> list[str]:
        """
        Get the path of a folder from a scope id.
        Args:
            scope_id (str): The scope id of the folder.

        Returns:
            list[str]: The list of scope ids from root to the folder.
        """
        _, list_of_scope_ids = self._get_knowledge_base_location(scope_id=scope_id)
        return list_of_scope_ids

`init(company_id, user_id, metadata_filter=None)` ¶

Initialize the ContentService with a company_id, user_id and chat_id.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def __init__(
    self,
    company_id: str,
    user_id: str,
    metadata_filter: dict | None = None,
):
    """
    Initialize the ContentService with a company_id, user_id and chat_id.
    """

    self._metadata_filter = None
    [company_id, user_id] = validate_required_values([company_id, user_id])
    self._company_id = company_id
    self._user_id = user_id
    self._metadata_filter = metadata_filter

`_get_knowledge_base_location(*, scope_id)` ¶

Get the path of a folder from a scope id.

Parameters:

Name	Type	Description	Default
`scope_id`	`str`	The scope id of the folder.	required

Returns:

Name	Type	Description
`PurePath`	`PurePath`	The path of the folder.
	`list[str]`	list[str]: The list of scope ids from root to the folder.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def _get_knowledge_base_location(
    self, *, scope_id: str
) -> tuple[PurePath, list[str]]:
    """
    Get the path of a folder from a scope id.

    Args:
        scope_id (str): The scope id of the folder.

    Returns:
        PurePath: The path of the folder.
        list[str]: The list of scope ids from root to the folder.
    """

    list_of_folder_names: list[str] = []
    list_of_scope_ids: list[str] = []
    folder_info = self.get_folder_info(scope_id=scope_id)
    list_of_scope_ids.append(folder_info.id)
    if folder_info.parent_id is not None:
        list_of_folder_names.append(folder_info.name)
    else:
        return PurePath("/" + folder_info.name), list_of_scope_ids

    while folder_info.parent_id is not None:
        folder_info = self.get_folder_info(scope_id=folder_info.parent_id)
        list_of_folder_names.append(folder_info.name)

    list_of_scope_ids.reverse()
    return PurePath("/" + "/".join(list_of_folder_names[::-1])), list_of_scope_ids

`batch_file_upload(*, local_files, remote_folders, overwrite=False, metadata_generator=None)` ¶

Upload files to the knowledge base into corresponding folders

Parameters:

Name	Type	Description	Default
`local_files`	`list[Path]`	The local files to upload	required
`remote_folders`	`list[PurePath]`	The remote folders to upload the files to	required
`overwrite`	`bool`	Whether to overwrite existing files	`False`
`metadata_generator`	`Callable[[Path, PurePath], dict[str, Any]] \| None`	The metadata generator function	`None`

Returns:

Type	Description
`None`	None

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def batch_file_upload(
    self,
    *,
    local_files: list[Path],
    remote_folders: list[PurePath],
    overwrite: bool = False,
    metadata_generator: Callable[[Path, PurePath], dict[str, Any]] | None = None,
) -> None:
    """
    Upload files to the knowledge base into corresponding folders

    Args:
        local_files (list[Path]): The local files to upload
        remote_folders (list[PurePath]): The remote folders to upload the files to
        overwrite (bool): Whether to overwrite existing files
        metadata_generator (Callable[[Path, PurePath], dict[str, Any]] | None): The metadata generator function

    Returns:
        None
    """

    if len(local_files) != len(remote_folders):
        raise ValueError(
            "The number of local files and remote folders must be the same"
        )

    creation_result = self.create_folders(paths=remote_folders)

    folders_path_to_scope_id = {
        folder_path: result.id
        for folder_path, result in zip(remote_folders, creation_result)
    }

    _old_scope_id = None
    _existing_file_names: list[str] = []

    for remote_folder_path, local_file_path in zip(remote_folders, local_files):
        scope_id = folders_path_to_scope_id[remote_folder_path]
        mime_type = mimetypes.guess_type(local_file_path.name)[0]

        if mime_type is None:
            _LOGGER.warning(
                f"No mime type found for file {local_file_path.name}, skipping"
            )
            continue

        if not overwrite:
            if _old_scope_id is None or _old_scope_id != scope_id:
                _LOGGER.debug(f"Switching to new folder {scope_id}")
                _old_scope_id = scope_id
                _existing_file_names = self.get_file_names_in_folder(
                    scope_id=scope_id
                )

            if local_file_path.name in _existing_file_names:
                _LOGGER.warning(
                    f"File {local_file_path.name} already exists in folder {scope_id}, skipping"
                )
                continue

        metadata = None
        if metadata_generator is not None:
            metadata = metadata_generator(local_file_path, remote_folder_path)

        self.upload_content(
            path_to_content=str(local_file_path),
            content_name=local_file_path.name,
            mime_type=mime_type,
            scope_id=scope_id,
            metadata=metadata,
        )

`create_folders(*, paths)` ¶

Create folders in the knowledge base if the path does not exists.

Parameters:

Name	Type	Description	Default
`paths`	`list[PurePath]`	The paths to create the folders at	required

Returns:

Type	Description
`list[BaseFolderInfo]`	list[BaseFolderInfo]: The information about the created folders or existing folders

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def create_folders(self, *, paths: list[PurePath]) -> list[BaseFolderInfo]:
    """
    Create folders in the knowledge base if the path does not exists.

    Args:
        paths (list[PurePath]): The paths to create the folders at

    Returns:
        list[BaseFolderInfo]: The information about the created folders or existing folders
    """
    result = unique_sdk.Folder.create_paths(
        user_id=self._user_id,
        company_id=self._company_id,
        paths=[path.as_posix() for path in paths],
    )
    return [
        BaseFolderInfo.model_validate(folder, by_alias=True, by_name=True)
        for folder in result["createdFolders"]
    ]

`delete_content(*, content_id=None, file_path=None)` ¶

delete_content(*, content_id: str) -> DeleteContentResponse

delete_content(*, file_path: str) -> DeleteContentResponse

Delete content by id, file path or metadata filter

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def delete_content(
    self,
    *,
    content_id: str | None = None,
    file_path: str | None = None,
) -> DeleteContentResponse:
    """Delete content by id, file path or metadata filter"""

    return delete_content(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_id,
        file_path=file_path,
    )

`delete_contents(*, metadata_filter)` ¶

Delete all content matching the metadata filter

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def delete_contents(
    self,
    *,
    metadata_filter: dict[str, Any],
) -> list[DeleteContentResponse]:
    """Delete all content matching the metadata filter"""
    resp: list[DeleteContentResponse] = []

    if metadata_filter:
        infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        )

        for info in infos.content_infos:
            resp.append(
                delete_content(
                    user_id=self._user_id,
                    company_id=self._company_id,
                    content_id=info.id,
                )
            )

    return resp

`download_content_to_bytes(*, content_id)` ¶

Downloads content to memory

Parameters:

Name	Type	Description	Default
`content_id`	`str`	The id of the uploaded content.	required

Returns:

Name	Type	Description
`bytes`	`bytes`	The downloaded content.

Raises:

Type	Description
`Exception`	If the download fails.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def download_content_to_bytes(
    self,
    *,
    content_id: str,
) -> bytes:
    """
    Downloads content to memory

    Args:
        content_id (str): The id of the uploaded content.

    Returns:
        bytes: The downloaded content.

    Raises:
        Exception: If the download fails.
    """

    return download_content_to_bytes(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_id,
        chat_id=None,
    )

`download_content_to_file(*, content_id, output_dir_path=None, output_filename=None)` ¶

Downloads content from a chat and saves it to a file.

Parameters:

Name	Type	Description	Default
`content_id`	`str`	The ID of the content to download.	required
`output_filename`	`str \| None`	The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.	`None`
`output_dir_path`	`str \| Path \| None`	The path to the temporary directory where the content will be saved. Defaults to "/tmp".	`None`

Returns:

Name	Type	Description
`Path`	`Path`	The path to the downloaded file.

Raises:

Type	Description
`Exception`	If the download fails or the filename cannot be determined.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def download_content_to_file(
    self,
    *,
    content_id: str,
    output_dir_path: Path | None = None,
    output_filename: str | None = None,
) -> Path:
    """
    Downloads content from a chat and saves it to a file.

    Args:
        content_id (str): The ID of the content to download.
        output_filename (str | None): The name of the file to save the content as. If not provided, the original filename will be used. Defaults to None.
        output_dir_path (str | Path | None): The path to the temporary directory where the content will be saved. Defaults to "/tmp".

    Returns:
        Path: The path to the downloaded file.

    Raises:
        Exception: If the download fails or the filename cannot be determined.
    """

    return download_content_to_file_by_id(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_id,
        chat_id="",
        filename=output_filename,
        tmp_dir_path=output_dir_path,
    )

`from_settings(settings=None, metadata_filter=None)` `classmethod` ¶

Initialize the ContentService with a settings object and metadata filter.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

@classmethod
def from_settings(
    cls,
    settings: UniqueSettings | str | None = None,
    metadata_filter: dict | None = None,
):
    """
    Initialize the ContentService with a settings object and metadata filter.
    """

    if settings is None:
        settings = UniqueSettings.from_env_auto_with_sdk_init()
    elif isinstance(settings, str):
        settings = UniqueSettings.from_env_auto_with_sdk_init(filename=settings)

    return cls(
        company_id=settings.auth.company_id.get_secret_value(),
        user_id=settings.auth.user_id.get_secret_value(),
        metadata_filter=metadata_filter,
    )

`get_file_names_in_folder(*, scope_id)` ¶

Get the list of file names in a knowledge base folder

Parameters:

Name	Type	Description	Default
`scope_id`	`str`	The scope id of the folder	required

Returns:

Type	Description
`list[str]`	list[str]: The list of file names in the folder

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def get_file_names_in_folder(self, *, scope_id: str) -> list[str]:
    """
    Get the list of file names in a knowledge base folder

    Args:
        scope_id (str): The scope id of the folder

    Returns:
        list[str]: The list of file names in the folder
    """
    smart_rule = Statement(
        operator=Operator.EQUALS, value=scope_id, path=["folderId"]
    )
    infos = self.get_paginated_content_infos(
        metadata_filter=smart_rule.model_dump(mode="json")
    )
    return [i.key for i in infos.content_infos]

`get_folder_path(*, scope_id)` ¶

Get the path of a folder from a scope id. Args: scope_id (str): The scope id of the folder.

Returns:

Name	Type	Description
`PurePath`	`PurePath`	The path of the folder.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def get_folder_path(self, *, scope_id: str) -> PurePath:
    """
    Get the path of a folder from a scope id.
    Args:
        scope_id (str): The scope id of the folder.

    Returns:
        PurePath: The path of the folder.
    """
    folder_path, _ = self._get_knowledge_base_location(scope_id=scope_id)
    return folder_path

`remove_content_metadata(*, content_info, keys_to_remove)` ¶

Removes the specified keys irreversibly from the content metadata.

Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def remove_content_metadata(
    self,
    *,
    content_info: ContentInfo,
    keys_to_remove: list[str],
) -> ContentInfo:
    """
    Removes the specified keys irreversibly from the content metadata.

    Note: Keys are camelized before being removed as metadata keys are stored in camelCase.
    """

    if content_info.metadata is None:
        _LOGGER.warning(f"Content metadata is None for content {content_info.id}")
        return content_info

    for key in keys_to_remove:
        content_info.metadata[humps.camelize(key)] = None

    return update_content(
        user_id=self._user_id,
        company_id=self._company_id,
        content_id=content_info.id,
        metadata=content_info.metadata or {},
    )

`remove_contents_metadata(*, keys_to_remove, metadata_filter=None, content_infos=None)` ¶

remove_contents_metadata(
    *,
    keys_to_remove: list[str],
    content_infos: list[ContentInfo],
) -> list[ContentInfo]

remove_contents_metadata(
    *,
    keys_to_remove: list[str],
    metadata_filter: dict[str, Any],
) -> list[ContentInfo]

Remove the specified keys irreversibly from the content metadata.

Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def remove_contents_metadata(
    self,
    *,
    keys_to_remove: list[str],
    metadata_filter: dict[str, Any] | None = None,
    content_infos: list[ContentInfo] | None = None,
) -> list[ContentInfo]:
    """Remove the specified keys irreversibly from the content metadata.

    Note: Keys are camelized before being removed as metadata keys are stored in camelCase.

    """

    if content_infos is None:
        content_infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        ).content_infos

    for info in content_infos:
        self.remove_content_metadata(
            content_info=info, keys_to_remove=keys_to_remove
        )

    return content_infos

`resolve_visible_file_tree(*, metadata_filter=None)` ¶

Resolves the visible file tree for the knowledge base for the current user.

Parameters:

Name	Type	Description	Default
`metadata_filter`	`dict[str, Any] \| None`	The metadata filter to use. Defaults to None.	`None`

Returns:

Type	Description
`list[str]`	list[str]: The visible file tree.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def resolve_visible_file_tree(
    self, *, metadata_filter: dict[str, Any] | None = None
) -> list[str]:
    """
    Resolves the visible file tree for the knowledge base for the current user.

    Args:
        metadata_filter (dict[str, Any] | None): The metadata filter to use. Defaults to None.

    Returns:
        list[str]: The visible file tree.



    """
    info = self.get_paginated_content_infos(
        metadata_filter=metadata_filter,
    )

    return self._resolve_visible_file_tree(content_infos=info.content_infos)

`search_content_chunks(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None)` ¶

search_content_chunks(
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    scope_ids: list[str],
    score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
) -> list[ContentChunk]

search_content_chunks(
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    metadata_filter: dict,
    scope_ids: list[str] | None = None,
    score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
) -> list[ContentChunk]

search_content_chunks(
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    metadata_filter: dict,
    content_ids: list[str],
    score_threshold: float = _DEFAULT_SCORE_THRESHOLD,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
) -> list[ContentChunk]

Performs a synchronous search for content chunks in the knowledge base.

Parameters:

Name	Type	Description	Default
`search_string`	`str`	The search string.	required
`search_type`	`ContentSearchType`	The type of search to perform.	required
`limit`	`int`	The maximum number of results to return.	required
`search_language`	`str`	The language for the full-text search. Defaults to "english".	`DEFAULT_SEARCH_LANGUAGE`
`reranker_config`	`ContentRerankerConfig \| None`	The reranker configuration. Defaults to None.	`None`
`scope_ids`	`list[str] \| None`	The scope IDs to filter by. Defaults to None.	`None`
`metadata_filter`	`dict \| None`	UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.	`None`
`content_ids`	`list[str] \| None`	The content IDs to search within. Defaults to None.	`None`
`score_threshold`	`float \| None`	Sets the minimum similarity score for search results to be considered. Defaults to 0.	`None`

Returns:

Type	Description
`list[ContentChunk]`	list[ContentChunk]: The search results.

Raises:

Type	Description
`Exception`	If there's an error during the search operation.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def search_content_chunks(
    self,
    *,
    search_string: str,
    search_type: ContentSearchType,
    limit: int,
    search_language: str = DEFAULT_SEARCH_LANGUAGE,
    reranker_config: ContentRerankerConfig | None = None,
    scope_ids: list[str] | None = None,
    metadata_filter: dict | None = None,
    content_ids: list[str] | None = None,
    score_threshold: float | None = None,
) -> list[ContentChunk]:
    """
    Performs a synchronous search for content chunks in the knowledge base.

    Args:
        search_string (str): The search string.
        search_type (ContentSearchType): The type of search to perform.
        limit (int): The maximum number of results to return.
        search_language (str, optional): The language for the full-text search. Defaults to "english".
        reranker_config (ContentRerankerConfig | None, optional): The reranker configuration. Defaults to None.
        scope_ids (list[str] | None, optional): The scope IDs to filter by. Defaults to None.
        metadata_filter (dict | None, optional): UniqueQL metadata filter. If unspecified/None, it tries to use the metadata filter from the event. Defaults to None.
        content_ids (list[str] | None, optional): The content IDs to search within. Defaults to None.
        score_threshold (float | None, optional): Sets the minimum similarity score for search results to be considered. Defaults to 0.

    Returns:
        list[ContentChunk]: The search results.

    Raises:
        Exception: If there's an error during the search operation.
    """

    if metadata_filter is None:
        metadata_filter = self._metadata_filter

    try:
        searches = search_content_chunks(
            user_id=self._user_id,
            company_id=self._company_id,
            chat_id="",
            search_string=search_string,
            search_type=search_type,
            limit=limit,
            search_language=search_language,
            reranker_config=reranker_config,
            scope_ids=scope_ids,
            chat_only=False,
            metadata_filter=metadata_filter,
            content_ids=content_ids,
            score_threshold=score_threshold,
        )
        return searches
    except Exception as e:
        _LOGGER.error(f"Error while searching content chunks: {e}")
        raise e

`search_contents(*, where, include_failed_content=False)` ¶

Performs a search in the knowledge base by filter (and not a smilarity search) This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.

Parameters:

Name	Type	Description	Default
`where`	`dict`	The search criteria.	required

Returns:

Type	Description
`list[Content]`	list[Content]: The search results.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def search_contents(
    self,
    *,
    where: dict,
    include_failed_content: bool = False,
) -> list[Content]:
    """
    Performs a search in the knowledge base by filter (and not a smilarity search)
    This function loads complete content of the files from the knowledge base in contrast to search_content_chunks.

    Args:
        where (dict): The search criteria.

    Returns:
        list[Content]: The search results.
    """

    return search_contents(
        user_id=self._user_id,
        company_id=self._company_id,
        chat_id="",
        where=where,
        include_failed_content=include_failed_content,
    )

`update_contents_metadata(*, additional_metadata, metadata_filter=None, content_infos=None)` ¶

update_contents_metadata(
    *,
    additional_metadata: dict[str, Any],
    content_infos: list[ContentInfo],
) -> list[ContentInfo]

update_contents_metadata(
    *,
    additional_metadata: dict[str, Any],
    metadata_filter: dict[str, Any],
) -> list[ContentInfo]

Update the metadata of the contents matching the metadata filter.

Note: Keys are camelized before being updated as metadata keys are stored in camelCase.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def update_contents_metadata(
    self,
    *,
    additional_metadata: dict[str, Any],
    metadata_filter: dict[str, Any] | None = None,
    content_infos: list[ContentInfo] | None = None,
) -> list[ContentInfo]:
    """Update the metadata of the contents matching the metadata filter.

    Note: Keys are camelized before being updated as metadata keys are stored in camelCase.
    """

    additional_metadata_camelized = humps.camelize(additional_metadata)
    additional_metadata_camelized = self._pop_forbidden_metadata_keys(
        additional_metadata_camelized
    )

    if content_infos is None:
        content_infos = self.get_paginated_content_infos(
            metadata_filter=metadata_filter,
        ).content_infos

    for info in content_infos:
        self.update_content_metadata(
            content_info=info, additional_metadata=additional_metadata_camelized
        )

    return content_infos

`upload_content(path_to_content, content_name, mime_type, scope_id, skip_ingestion=False, skip_excel_ingestion=False, ingestion_config=None, metadata=None)` ¶

Uploads content to the knowledge base.

Parameters:

Name	Type	Description	Default
`path_to_content`	`str`	The path to the content to upload.	required
`content_name`	`str`	The name of the content.	required
`mime_type`	`str`	The MIME type of the content.	required
`scope_id`	`str \| None`	The scope ID. Defaults to None.	required
`skip_ingestion`	`bool`	Whether to skip ingestion. Defaults to False.	`False`
`skip_excel_ingestion`	`bool`	Whether to skip excel ingestion. Defaults to False.	`False`
`ingestion_config`	`IngestionConfig \| None`	The ingestion configuration. Defaults to None.	`None`
`metadata`	`dict[str, Any] \| None`	The metadata to associate with the content. Defaults to None.	`None`

Returns:

Name	Type	Description
`Content`	`Content`	The uploaded content.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def upload_content(
    self,
    path_to_content: str,
    content_name: str,
    mime_type: str,
    scope_id: str,
    skip_ingestion: bool = False,
    skip_excel_ingestion: bool = False,
    ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
    metadata: dict[str, Any] | None = None,
) -> Content:
    """
    Uploads content to the knowledge base.

    Args:
        path_to_content (str): The path to the content to upload.
        content_name (str): The name of the content.
        mime_type (str): The MIME type of the content.
        scope_id (str | None): The scope ID. Defaults to None.
        skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
        skip_excel_ingestion (bool): Whether to skip excel ingestion. Defaults to False.
        ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
        metadata (dict[str, Any] | None): The metadata to associate with the content. Defaults to None.

    Returns:
        Content: The uploaded content.
    """

    return upload_content(
        user_id=self._user_id,
        company_id=self._company_id,
        path_to_content=path_to_content,
        content_name=content_name,
        mime_type=mime_type,
        scope_id=scope_id,
        chat_id="",
        skip_ingestion=skip_ingestion,
        skip_excel_ingestion=skip_excel_ingestion,
        ingestion_config=ingestion_config,
        metadata=metadata,
    )

`upload_content_from_bytes(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None)` ¶

Uploads content to the knowledge base.

Parameters:

Name	Type	Description	Default
`content`	`bytes`	The content to upload.	required
`content_name`	`str`	The name of the content.	required
`mime_type`	`str`	The MIME type of the content.	required
`scope_id`	`str \| None`	The scope ID. Defaults to None.	required
`skip_ingestion`	`bool`	Whether to skip ingestion. Defaults to False.	`False`
`ingestion_config`	`IngestionConfig \| None`	The ingestion configuration. Defaults to None.	`None`
`metadata`	`dict \| None`	The metadata to associate with the content. Defaults to None.	`None`

Returns:

Name	Type	Description
`Content`	`Content`	The uploaded content.

Source code in unique_toolkit/unique_toolkit/services/knowledge_base.py

def upload_content_from_bytes(
    self,
    content: bytes,
    *,
    content_name: str,
    mime_type: str,
    scope_id: str,
    skip_ingestion: bool = False,
    ingestion_config: unique_sdk.Content.IngestionConfig | None = None,
    metadata: dict | None = None,
) -> Content:
    """
    Uploads content to the knowledge base.

    Args:
        content (bytes): The content to upload.
        content_name (str): The name of the content.
        mime_type (str): The MIME type of the content.
        scope_id (str | None): The scope ID. Defaults to None.
        skip_ingestion (bool): Whether to skip ingestion. Defaults to False.
        ingestion_config (unique_sdk.Content.IngestionConfig | None): The ingestion configuration. Defaults to None.
        metadata (dict | None): The metadata to associate with the content. Defaults to None.

    Returns:
        Content: The uploaded content.
    """

    return upload_content_from_bytes(
        user_id=self._user_id,
        company_id=self._company_id,
        content=content,
        content_name=content_name,
        mime_type=mime_type,
        scope_id=scope_id,
        chat_id="",
        skip_ingestion=skip_ingestion,
        ingestion_config=ingestion_config,
        metadata=metadata,
    )

Knowledge Base Module

Service¶

unique_toolkit.services.knowledge_base ¶

KnowledgeBaseService ¶

__init__(company_id, user_id, metadata_filter=None) ¶

_get_knowledge_base_location(*, scope_id) ¶

batch_file_upload(*, local_files, remote_folders, overwrite=False, metadata_generator=None) ¶

create_folders(*, paths) ¶

delete_content(*, content_id=None, file_path=None) ¶

delete_contents(*, metadata_filter) ¶

download_content_to_bytes(*, content_id) ¶

download_content_to_file(*, content_id, output_dir_path=None, output_filename=None) ¶

from_settings(settings=None, metadata_filter=None) classmethod ¶

get_file_names_in_folder(*, scope_id) ¶

get_folder_path(*, scope_id) ¶

remove_content_metadata(*, content_info, keys_to_remove) ¶

remove_contents_metadata(*, keys_to_remove, metadata_filter=None, content_infos=None) ¶

resolve_visible_file_tree(*, metadata_filter=None) ¶

search_content_chunks(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None) ¶

search_contents(*, where, include_failed_content=False) ¶

update_contents_metadata(*, additional_metadata, metadata_filter=None, content_infos=None) ¶

upload_content(path_to_content, content_name, mime_type, scope_id, skip_ingestion=False, skip_excel_ingestion=False, ingestion_config=None, metadata=None) ¶

upload_content_from_bytes(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None) ¶

`unique_toolkit.services.knowledge_base` ¶

`KnowledgeBaseService` ¶

`init(company_id, user_id, metadata_filter=None)` ¶

`_get_knowledge_base_location(*, scope_id)` ¶

`batch_file_upload(*, local_files, remote_folders, overwrite=False, metadata_generator=None)` ¶

`create_folders(*, paths)` ¶

`delete_content(*, content_id=None, file_path=None)` ¶

`delete_contents(*, metadata_filter)` ¶

`download_content_to_bytes(*, content_id)` ¶

`download_content_to_file(*, content_id, output_dir_path=None, output_filename=None)` ¶

`from_settings(settings=None, metadata_filter=None)` `classmethod` ¶

`get_file_names_in_folder(*, scope_id)` ¶

`get_folder_path(*, scope_id)` ¶

`remove_content_metadata(*, content_info, keys_to_remove)` ¶

`remove_contents_metadata(*, keys_to_remove, metadata_filter=None, content_infos=None)` ¶

`resolve_visible_file_tree(*, metadata_filter=None)` ¶

`search_content_chunks(*, search_string, search_type, limit, search_language=DEFAULT_SEARCH_LANGUAGE, reranker_config=None, scope_ids=None, metadata_filter=None, content_ids=None, score_threshold=None)` ¶

`search_contents(*, where, include_failed_content=False)` ¶

`update_contents_metadata(*, additional_metadata, metadata_filter=None, content_infos=None)` ¶

`upload_content(path_to_content, content_name, mime_type, scope_id, skip_ingestion=False, skip_excel_ingestion=False, ingestion_config=None, metadata=None)` ¶

`upload_content_from_bytes(content, *, content_name, mime_type, scope_id, skip_ingestion=False, ingestion_config=None, metadata=None)` ¶