Agents

`llama_cpp_agent.llm_agent`

`StreamingResponse` `dataclass`

Represents a streaming response with text and an indicator for the last response.

Source code in llama_cpp_agent/llm_agent.py

@dataclass
class StreamingResponse:
    """
    Represents a streaming response with text and an indicator for the last response.
    """

    text: str
    is_last_response: bool

    def __init__(self, text: str, is_last_response: bool):
        """
        Initializes a new StreamingResponse object.

        Args:
            text (str): The text content of the streaming response.
            is_last_response (bool): Indicates whether this is the last response in the stream.
        """
        self.text = text
        self.is_last_response = is_last_response

`init(text, is_last_response)`

Initializes a new StreamingResponse object.

Parameters:

text (str) –

The text content of the streaming response.
is_last_response (bool) –

Indicates whether this is the last response in the stream.

Source code in llama_cpp_agent/llm_agent.py

def __init__(self, text: str, is_last_response: bool):
    """
    Initializes a new StreamingResponse object.

    Args:
        text (str): The text content of the streaming response.
        is_last_response (bool): Indicates whether this is the last response in the stream.
    """
    self.text = text
    self.is_last_response = is_last_response

`LlamaCppAgent`

A base agent that can be used for chat, structured output and function calling.

Source code in llama_cpp_agent/llm_agent.py

class LlamaCppAgent:
    """
    A base agent that can be used for chat, structured output and function calling.
    """

    def __init__(
            self,
            provider: LlmProvider,
            name: str = "llamacpp_agent",
            system_prompt: str = "You are a helpful assistant.",
            predefined_messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
            custom_messages_formatter: MessagesFormatter = None,
            chat_history: ChatHistory = None,
            add_tools_and_structures_documentation_to_system_prompt: bool = True,
            debug_output: bool = False,
    ):
        """
        Initializes a new LlamaCppAgent object.

        Args:
           provider (LlmProvider):The underlying llm provider (LlamaCppServerProvider, LlamaCppPythonProvider, TGIServerProvider or VLLMServerProvider).
           name (str): The name of the agent.
           system_prompt (str): The system prompt used in chat interactions.
           predefined_messages_formatter_type (MessagesFormatterType): The type of predefined messages formatter.
           custom_messages_formatter (MessagesFormatter): Custom message's formatter.
           chat_history (ChatHistory): This will handle the chat history.
           add_tools_and_structures_documentation_to_system_prompt (bool): Will suffix system prompt dynamically with documentation for function calling or structured output.
           debug_output (bool): Indicates whether debug output should be enabled.
        """
        self.provider = provider
        self.name = name
        self.debug_output = debug_output
        if custom_messages_formatter is not None:
            self.messages_formatter = custom_messages_formatter
        else:
            self.messages_formatter = get_predefined_messages_formatter(
                predefined_messages_formatter_type
            )
        self.last_response = ""
        if chat_history is None:
            self.chat_history = BasicChatHistory()
        else:
            self.chat_history = chat_history

        self.add_message(role=Roles.system, message=system_prompt)
        self.system_prompt = system_prompt
        self.add_tools_and_structures_documentation_to_system_prompt = add_tools_and_structures_documentation_to_system_prompt

    def add_message(
            self,
            message: str,
            role: Roles,
    ):
        """
        Adds a message to the chat history.

        Args:
            message (str): The content of the message.
            role (Literal["system"] | Literal["user"] | Literal["assistant"] | Literal["tool"]): The role of the message sender.
        """
        self.chat_history.add_message(
            {
                "role": role,
                "content": message,
            }
        )

    def get_text_response(
            self,
            prompt: str = None,
            structured_output_settings: LlmStructuredOutputSettings = None,
            llm_sampling_settings: LlmSamplingSettings = None,
            streaming_callback: Callable[[StreamingResponse], None] = None,
            returns_streaming_generator: bool = False,
            print_output: bool = False,
    ) -> Union[
        str,
        List[dict],
        BaseModel,
        Generator[Any, Any, str | BaseModel | list[BaseModel]],
    ]:
        """
        Get a text response from the LLM provider.

        Args:
            prompt (str | list[int]): The prompt for the LLM.
            structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
            llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
            streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
            returns_streaming_generator (bool): Whether to return a generator streaming the results.
            print_output (bool): Whether to print the output.

        Returns:
            Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated response. A string message, a list of function calls, an object from structured output or a generator for the response
        """

        if self.debug_output:
            if type(prompt) is str:
                print(prompt, end="")

        if structured_output_settings is None:
            structured_output_settings = LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.no_structured_output
            )
        if llm_sampling_settings is None:
            llm_sampling_settings = self.provider.get_provider_default_settings()
        else:
            llm_sampling_settings = deepcopy(llm_sampling_settings)

        if llm_sampling_settings.get_additional_stop_sequences() is not None:
            llm_sampling_settings.add_additional_stop_sequences(
                self.messages_formatter.default_stop_sequences
            )

        if self.provider:
            completion = self.get_text_completion(
                prompt=prompt,
                structured_output_settings=structured_output_settings,
                llm_samplings_settings=llm_sampling_settings,
            )

            def stream_results():
                full_response_stream = ""
                for out_stream in completion:
                    out_text = out_stream["choices"][0]["text"]
                    full_response_stream += out_text
                    yield out_text

                return structured_output_settings.handle_structured_output(
                    full_response_stream, provider=self.provider
                )

            if llm_sampling_settings.is_streaming():
                full_response = ""
                if returns_streaming_generator:
                    return stream_results()
                for out in completion:
                    text = out["choices"][0]["text"]
                    full_response += text
                    if streaming_callback is not None:
                        streaming_callback(
                            StreamingResponse(text=text, is_last_response=False)
                        )
                    if print_output:
                        print(text, end="")
                if streaming_callback is not None:
                    streaming_callback(
                        StreamingResponse(text="", is_last_response=True)
                    )
                if print_output or self.debug_output:
                    print("")
                self.last_response = full_response
                return structured_output_settings.handle_structured_output(
                    full_response, provider=self.provider
                )
            else:
                full_response = ""
                text = completion["choices"][0]["text"]
                full_response += text
                if print_output or self.debug_output:
                    print(full_response)
                self.last_response = full_response
                return structured_output_settings.handle_structured_output(
                    full_response, provider=self.provider
                )
        return "Error: No model loaded!"

    def get_chat_response(
            self,
            message: str = None,
            role: Roles = Roles.user,
            prompt_suffix: str = None,
            chat_history: ChatHistory = None,
            system_prompt: str = None,
            system_prompt_modules: list[SystemPromptModule] = None,
            add_message_to_chat_history: bool = True,
            add_response_to_chat_history: bool = True,
            structured_output_settings: LlmStructuredOutputSettings = None,
            llm_sampling_settings: LlmSamplingSettings = None,
            streaming_callback: Callable[[StreamingResponse], None] = None,
            returns_streaming_generator: bool = False,
            print_output: bool = False,
    ) -> Union[
        str,
        List[dict],
        BaseModel,
        Generator[Any, Any, str | BaseModel | list[BaseModel]],
    ]:
        """
        Get a chat response based on the input message and context.

        Args:
            message (str): The input message.
            role (Literal["system", "user", "assistant", "tool"]): The role of the message sender.
            prompt_suffix (str): Suffix to append after the prompt.
            chat_history (ChatHistory): Overwrite internal ChatHistory of the agent.
            system_prompt (str): Overwrites the system prompt set on the agent initialization.
            system_prompt_modules (SystemPromptModules): Additional sections added to the system prompt.
            add_message_to_chat_history (bool): Whether to add the input message to the chat history.
            add_response_to_chat_history (bool): Whether to add the generated response to the chat history.
            structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
            llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
            streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
            returns_streaming_generator (bool): Whether to return a generator streaming the results.
            print_output (bool): Whether to print the generated response.

        Returns:
            Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated chat response. A string message, a list of function calls, an object from structured output or a generator for the response
        """
        if chat_history is None:
            chat_history = self.chat_history

        if structured_output_settings is None:
            structured_output_settings = LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.no_structured_output
            )
        if llm_sampling_settings is None:
            llm_sampling_settings = self.provider.get_provider_default_settings()
        else:
            llm_sampling_settings = deepcopy(llm_sampling_settings)

        if llm_sampling_settings.get_additional_stop_sequences() is not None:
            llm_sampling_settings.add_additional_stop_sequences(
                self.messages_formatter.default_stop_sequences
            )

        completion, response_role = self.get_response_role_and_completion(
            message=message,
            chat_history=chat_history,
            system_prompt=system_prompt,
            system_prompt_modules=system_prompt_modules,
            add_message_to_chat_history=add_message_to_chat_history,
            role=role,
            prompt_suffix=prompt_suffix,
            structured_output_settings=structured_output_settings,
            llm_sampling_settings=llm_sampling_settings,
        )

        def stream_results():
            full_response_stream = ""
            for out_stream in completion:
                out_text = out_stream["choices"][0]["text"]
                if out_text != self.messages_formatter.eos_token:
                    full_response_stream += out_text
                    yield out_text
            if prompt_suffix:
                full_response_stream = prompt_suffix + full_response_stream
            self.last_response = full_response_stream
            if add_response_to_chat_history:
                chat_history.add_message(
                    {
                        "role": response_role,
                        "content": full_response_stream,
                    }
                )
            return structured_output_settings.handle_structured_output(
                full_response_stream, prompt_suffix=prompt_suffix, provider=self.provider
            )

        if self.provider:
            if returns_streaming_generator:
                return stream_results()
            if llm_sampling_settings.is_streaming():
                full_response = ""
                for out in completion:
                    text = out["choices"][0]["text"]
                    if text != self.messages_formatter.eos_token:
                        full_response += text
                        if streaming_callback is not None:
                            streaming_callback(
                                StreamingResponse(text=text, is_last_response=False)
                            )
                        if print_output or self.debug_output:
                            print(text, end="")
                if streaming_callback is not None:
                    streaming_callback(
                        StreamingResponse(text="", is_last_response=True)
                    )
                if print_output or self.debug_output:
                    print("")
                if prompt_suffix:
                    full_response = prompt_suffix + full_response
                self.last_response = full_response
                if add_response_to_chat_history:
                    chat_history.add_message(
                        {
                            "role": response_role,
                            "content": full_response,
                        }
                    )

                return structured_output_settings.handle_structured_output(
                    full_response, prompt_suffix=prompt_suffix, provider=self.provider
                )
            else:
                text = completion["choices"][0]["text"]
                if text.strip().endswith(self.messages_formatter.eos_token):
                    text = text.replace(self.messages_formatter.eos_token, "")
                if print_output or self.debug_output:
                    print(text)
                if prompt_suffix:
                    text = prompt_suffix + text
                self.last_response = text
                if add_response_to_chat_history:
                    chat_history.add_message(
                        {
                            "role": response_role,
                            "content": text,
                        }
                    )

                return structured_output_settings.handle_structured_output(text, prompt_suffix=prompt_suffix, provider=self.provider)
        return "Error: No model loaded!"

    def get_text_completion(
            self,
            prompt: str | list[int] = None,
            structured_output_settings: LlmStructuredOutputSettings = None,
            llm_samplings_settings: LlmSamplingSettings = None,
    ):
        return self.provider.create_completion(
            prompt,
            structured_output_settings,
            llm_samplings_settings,
            self.messages_formatter.bos_token,
        )

    def get_response_role_and_completion(
            self,
            message: str = None,
            chat_history: ChatHistory = None,
            system_prompt: str = None,
            system_prompt_modules: list[SystemPromptModule] = None,
            add_message_to_chat_history: bool = True,
            role: Roles = Roles.user,
            prompt_suffix: str = None,
            llm_sampling_settings: LlmSamplingSettings = None,
            structured_output_settings: LlmStructuredOutputSettings = None,
    ):
        if len(chat_history.get_chat_messages()) == 0:
            if system_prompt:
                chat_history.add_message({"role": Roles.system, "content": system_prompt})
            else:
                chat_history.add_message({"role": Roles.system, "content": self.system_prompt})

        if message is not None and add_message_to_chat_history:
            chat_history.add_message(
                {
                    "role": role,
                    "content": message,
                }
            )

        messages = chat_history.get_chat_messages()
        if message is not None and not add_message_to_chat_history:
            messages.append(
                {
                    "role": role,
                    "content": message,
                },
            )

        if system_prompt:
            if messages[0]["role"] != Roles.system and (messages[0]["role"] != Roles.system.value):
                messages.insert(0, {"role": Roles.system, "content": system_prompt})
            else:
                messages[0]["content"] = system_prompt
        else:
            if messages[0]["role"] != Roles.system and (messages[0]["role"] != Roles.system.value):
                messages.insert(0, {"role": Roles.system, "content": self.system_prompt})
            else:
                messages[0]["content"] = self.system_prompt

        additional_suffix = ""
        if self.add_tools_and_structures_documentation_to_system_prompt:
            after_system_instructions_list = []
            after_system_instructions = ""
            if system_prompt_modules is not None:
                for module in system_prompt_modules:
                    if module.position == SystemPromptModulePosition.after_system_instructions:
                        after_system_instructions_list.append(module.get_formatted_content())
                if len(after_system_instructions_list) > 0:
                    after_system_instructions = "\n\n".join(after_system_instructions_list)
                else:
                    after_system_instructions = ""
            if structured_output_settings.output_type != LlmStructuredOutputType.no_structured_output:
                # additional_suffix = "\n"
                thoughts_and_reasoning = ""

                if structured_output_settings.output_type == LlmStructuredOutputType.function_calling or structured_output_settings.output_type == LlmStructuredOutputType.parallel_function_calling:
                    if structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = function_calling_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": "001_" + structured_output_settings.thoughts_and_reasoning_field_name})
                        function_field_name = "002_" + structured_output_settings.function_calling_name_field_name
                        arguments_field_name = "003_" + structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = "004_" + structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        function_field_name = "001_" + structured_output_settings.function_calling_name_field_name
                        arguments_field_name = "002_" + structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = "003_" + structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                    elif structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = function_calling_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": structured_output_settings.thoughts_and_reasoning_field_name})
                        function_field_name = structured_output_settings.function_calling_name_field_name
                        arguments_field_name = structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = ""
                        function_field_name = structured_output_settings.function_calling_name_field_name
                        arguments_field_name = structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                elif structured_output_settings.output_type == LlmStructuredOutputType.object_instance or structured_output_settings.output_type == LlmStructuredOutputType.list_of_objects:
                    if structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = structured_output_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": "001_" + structured_output_settings.thoughts_and_reasoning_field_name})
                        model_field_name = "002_" + structured_output_settings.output_model_name_field_name
                        fields_field_name = "003_" + structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = ""
                        model_field_name = "001_" + structured_output_settings.output_model_name_field_name
                        fields_field_name = "002_" + structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt
                    elif structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = structured_output_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": structured_output_settings.thoughts_and_reasoning_field_name})
                        model_field_name = structured_output_settings.output_model_name_field_name
                        fields_field_name = structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        model_field_name = structured_output_settings.output_model_name_field_name
                        fields_field_name = structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt

            if structured_output_settings.output_type == LlmStructuredOutputType.no_structured_output or structured_output_settings is None:
                messages[0]["content"] += "\n" + after_system_instructions
        at_end_list = []
        if system_prompt_modules is not None:
            for module in system_prompt_modules:
                if module.position == SystemPromptModulePosition.at_end:
                    at_end_list.append(module.get_formatted_content())
            if len(at_end_list) > 0:
                at_end_list = "\n\n".join(at_end_list)
            else:
                at_end_list = ""

            messages[0]["content"] += at_end_list
        prompt, response_role = self.messages_formatter.format_conversation(
            messages, Roles.assistant
        )

        if prompt_suffix:
            prompt += prompt_suffix

        if self.debug_output:
            print(prompt, end="")

        return (
            self.provider.create_completion(
                prompt if self.provider.get_provider_identifier() is not LlmProviderId.groq else messages,
                structured_output_settings,
                llm_sampling_settings,
                self.messages_formatter.bos_token,
            ),
            response_role,
        )

    @staticmethod
    def remove_any(text, list_of_strings):
        for item in list_of_strings:
            text = text.replace(item, "")
        return text

`init(provider, name='llamacpp_agent', system_prompt='You are a helpful assistant.', predefined_messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, chat_history=None, add_tools_and_structures_documentation_to_system_prompt=True, debug_output=False)`

Initializes a new LlamaCppAgent object.

Parameters:

provider (LlmProvider) –

The underlying llm provider (LlamaCppServerProvider, LlamaCppPythonProvider, TGIServerProvider or VLLMServerProvider).
name (str, default: 'llamacpp_agent' ) –

The name of the agent.
system_prompt (str, default: 'You are a helpful assistant.' ) –

The system prompt used in chat interactions.
predefined_messages_formatter_type (MessagesFormatterType, default: CHATML ) –

The type of predefined messages formatter.
custom_messages_formatter (MessagesFormatter, default: None ) –

Custom message's formatter.
chat_history (ChatHistory, default: None ) –

This will handle the chat history.
add_tools_and_structures_documentation_to_system_prompt (bool, default: True ) –

Will suffix system prompt dynamically with documentation for function calling or structured output.
debug_output (bool, default: False ) –

Indicates whether debug output should be enabled.

Source code in llama_cpp_agent/llm_agent.py

def __init__(
        self,
        provider: LlmProvider,
        name: str = "llamacpp_agent",
        system_prompt: str = "You are a helpful assistant.",
        predefined_messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        chat_history: ChatHistory = None,
        add_tools_and_structures_documentation_to_system_prompt: bool = True,
        debug_output: bool = False,
):
    """
    Initializes a new LlamaCppAgent object.

    Args:
       provider (LlmProvider):The underlying llm provider (LlamaCppServerProvider, LlamaCppPythonProvider, TGIServerProvider or VLLMServerProvider).
       name (str): The name of the agent.
       system_prompt (str): The system prompt used in chat interactions.
       predefined_messages_formatter_type (MessagesFormatterType): The type of predefined messages formatter.
       custom_messages_formatter (MessagesFormatter): Custom message's formatter.
       chat_history (ChatHistory): This will handle the chat history.
       add_tools_and_structures_documentation_to_system_prompt (bool): Will suffix system prompt dynamically with documentation for function calling or structured output.
       debug_output (bool): Indicates whether debug output should be enabled.
    """
    self.provider = provider
    self.name = name
    self.debug_output = debug_output
    if custom_messages_formatter is not None:
        self.messages_formatter = custom_messages_formatter
    else:
        self.messages_formatter = get_predefined_messages_formatter(
            predefined_messages_formatter_type
        )
    self.last_response = ""
    if chat_history is None:
        self.chat_history = BasicChatHistory()
    else:
        self.chat_history = chat_history

    self.add_message(role=Roles.system, message=system_prompt)
    self.system_prompt = system_prompt
    self.add_tools_and_structures_documentation_to_system_prompt = add_tools_and_structures_documentation_to_system_prompt

`add_message(message, role)`

Adds a message to the chat history.

Parameters:

message (str) –

The content of the message.
role (Literal['system'] | Literal['user'] | Literal['assistant'] | Literal['tool']) –

The role of the message sender.

Source code in llama_cpp_agent/llm_agent.py

def add_message(
        self,
        message: str,
        role: Roles,
):
    """
    Adds a message to the chat history.

    Args:
        message (str): The content of the message.
        role (Literal["system"] | Literal["user"] | Literal["assistant"] | Literal["tool"]): The role of the message sender.
    """
    self.chat_history.add_message(
        {
            "role": role,
            "content": message,
        }
    )

`get_text_response(prompt=None, structured_output_settings=None, llm_sampling_settings=None, streaming_callback=None, returns_streaming_generator=False, print_output=False)`

Get a text response from the LLM provider.

Parameters:

prompt (str | list[int], default: None ) –

The prompt for the LLM.
structured_output_settings (LlmStructuredOutputSettings, default: None ) –

Settings for structured output.
llm_sampling_settings (LlmSamplingSettings, default: None ) –

Sampling settings for the LLM.
streaming_callback (Callable[[StreamingResponse], None], default: None ) –

Callback for streaming responses.
returns_streaming_generator (bool, default: False ) –

Whether to return a generator streaming the results.
print_output (bool, default: False ) –

Whether to print the output.

Returns:

Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]] –

Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated response. A string message, a list of function calls, an object from structured output or a generator for the response

Source code in llama_cpp_agent/llm_agent.py

def get_text_response(
        self,
        prompt: str = None,
        structured_output_settings: LlmStructuredOutputSettings = None,
        llm_sampling_settings: LlmSamplingSettings = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        returns_streaming_generator: bool = False,
        print_output: bool = False,
) -> Union[
    str,
    List[dict],
    BaseModel,
    Generator[Any, Any, str | BaseModel | list[BaseModel]],
]:
    """
    Get a text response from the LLM provider.

    Args:
        prompt (str | list[int]): The prompt for the LLM.
        structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
        llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
        streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
        returns_streaming_generator (bool): Whether to return a generator streaming the results.
        print_output (bool): Whether to print the output.

    Returns:
        Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated response. A string message, a list of function calls, an object from structured output or a generator for the response
    """

    if self.debug_output:
        if type(prompt) is str:
            print(prompt, end="")

    if structured_output_settings is None:
        structured_output_settings = LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.no_structured_output
        )
    if llm_sampling_settings is None:
        llm_sampling_settings = self.provider.get_provider_default_settings()
    else:
        llm_sampling_settings = deepcopy(llm_sampling_settings)

    if llm_sampling_settings.get_additional_stop_sequences() is not None:
        llm_sampling_settings.add_additional_stop_sequences(
            self.messages_formatter.default_stop_sequences
        )

    if self.provider:
        completion = self.get_text_completion(
            prompt=prompt,
            structured_output_settings=structured_output_settings,
            llm_samplings_settings=llm_sampling_settings,
        )

        def stream_results():
            full_response_stream = ""
            for out_stream in completion:
                out_text = out_stream["choices"][0]["text"]
                full_response_stream += out_text
                yield out_text

            return structured_output_settings.handle_structured_output(
                full_response_stream, provider=self.provider
            )

        if llm_sampling_settings.is_streaming():
            full_response = ""
            if returns_streaming_generator:
                return stream_results()
            for out in completion:
                text = out["choices"][0]["text"]
                full_response += text
                if streaming_callback is not None:
                    streaming_callback(
                        StreamingResponse(text=text, is_last_response=False)
                    )
                if print_output:
                    print(text, end="")
            if streaming_callback is not None:
                streaming_callback(
                    StreamingResponse(text="", is_last_response=True)
                )
            if print_output or self.debug_output:
                print("")
            self.last_response = full_response
            return structured_output_settings.handle_structured_output(
                full_response, provider=self.provider
            )
        else:
            full_response = ""
            text = completion["choices"][0]["text"]
            full_response += text
            if print_output or self.debug_output:
                print(full_response)
            self.last_response = full_response
            return structured_output_settings.handle_structured_output(
                full_response, provider=self.provider
            )
    return "Error: No model loaded!"

`get_chat_response(message=None, role=Roles.user, prompt_suffix=None, chat_history=None, system_prompt=None, system_prompt_modules=None, add_message_to_chat_history=True, add_response_to_chat_history=True, structured_output_settings=None, llm_sampling_settings=None, streaming_callback=None, returns_streaming_generator=False, print_output=False)`

Get a chat response based on the input message and context.

Parameters:

message (str, default: None ) –

The input message.
role (Literal['system', 'user', 'assistant', 'tool'], default: user ) –

The role of the message sender.
prompt_suffix (str, default: None ) –

Suffix to append after the prompt.
chat_history (ChatHistory, default: None ) –

Overwrite internal ChatHistory of the agent.
system_prompt (str, default: None ) –

Overwrites the system prompt set on the agent initialization.
system_prompt_modules (SystemPromptModules, default: None ) –

Additional sections added to the system prompt.
add_message_to_chat_history (bool, default: True ) –

Whether to add the input message to the chat history.
add_response_to_chat_history (bool, default: True ) –

Whether to add the generated response to the chat history.
structured_output_settings (LlmStructuredOutputSettings, default: None ) –

Settings for structured output.
llm_sampling_settings (LlmSamplingSettings, default: None ) –

Sampling settings for the LLM.
streaming_callback (Callable[[StreamingResponse], None], default: None ) –

Callback for streaming responses.
returns_streaming_generator (bool, default: False ) –

Whether to return a generator streaming the results.
print_output (bool, default: False ) –

Whether to print the generated response.

Returns:

Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]] –

Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated chat response. A string message, a list of function calls, an object from structured output or a generator for the response

Source code in llama_cpp_agent/llm_agent.py

def get_chat_response(
        self,
        message: str = None,
        role: Roles = Roles.user,
        prompt_suffix: str = None,
        chat_history: ChatHistory = None,
        system_prompt: str = None,
        system_prompt_modules: list[SystemPromptModule] = None,
        add_message_to_chat_history: bool = True,
        add_response_to_chat_history: bool = True,
        structured_output_settings: LlmStructuredOutputSettings = None,
        llm_sampling_settings: LlmSamplingSettings = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        returns_streaming_generator: bool = False,
        print_output: bool = False,
) -> Union[
    str,
    List[dict],
    BaseModel,
    Generator[Any, Any, str | BaseModel | list[BaseModel]],
]:
    """
    Get a chat response based on the input message and context.

    Args:
        message (str): The input message.
        role (Literal["system", "user", "assistant", "tool"]): The role of the message sender.
        prompt_suffix (str): Suffix to append after the prompt.
        chat_history (ChatHistory): Overwrite internal ChatHistory of the agent.
        system_prompt (str): Overwrites the system prompt set on the agent initialization.
        system_prompt_modules (SystemPromptModules): Additional sections added to the system prompt.
        add_message_to_chat_history (bool): Whether to add the input message to the chat history.
        add_response_to_chat_history (bool): Whether to add the generated response to the chat history.
        structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
        llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
        streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
        returns_streaming_generator (bool): Whether to return a generator streaming the results.
        print_output (bool): Whether to print the generated response.

    Returns:
        Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated chat response. A string message, a list of function calls, an object from structured output or a generator for the response
    """
    if chat_history is None:
        chat_history = self.chat_history

    if structured_output_settings is None:
        structured_output_settings = LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.no_structured_output
        )
    if llm_sampling_settings is None:
        llm_sampling_settings = self.provider.get_provider_default_settings()
    else:
        llm_sampling_settings = deepcopy(llm_sampling_settings)

    if llm_sampling_settings.get_additional_stop_sequences() is not None:
        llm_sampling_settings.add_additional_stop_sequences(
            self.messages_formatter.default_stop_sequences
        )

    completion, response_role = self.get_response_role_and_completion(
        message=message,
        chat_history=chat_history,
        system_prompt=system_prompt,
        system_prompt_modules=system_prompt_modules,
        add_message_to_chat_history=add_message_to_chat_history,
        role=role,
        prompt_suffix=prompt_suffix,
        structured_output_settings=structured_output_settings,
        llm_sampling_settings=llm_sampling_settings,
    )

    def stream_results():
        full_response_stream = ""
        for out_stream in completion:
            out_text = out_stream["choices"][0]["text"]
            if out_text != self.messages_formatter.eos_token:
                full_response_stream += out_text
                yield out_text
        if prompt_suffix:
            full_response_stream = prompt_suffix + full_response_stream
        self.last_response = full_response_stream
        if add_response_to_chat_history:
            chat_history.add_message(
                {
                    "role": response_role,
                    "content": full_response_stream,
                }
            )
        return structured_output_settings.handle_structured_output(
            full_response_stream, prompt_suffix=prompt_suffix, provider=self.provider
        )

    if self.provider:
        if returns_streaming_generator:
            return stream_results()
        if llm_sampling_settings.is_streaming():
            full_response = ""
            for out in completion:
                text = out["choices"][0]["text"]
                if text != self.messages_formatter.eos_token:
                    full_response += text
                    if streaming_callback is not None:
                        streaming_callback(
                            StreamingResponse(text=text, is_last_response=False)
                        )
                    if print_output or self.debug_output:
                        print(text, end="")
            if streaming_callback is not None:
                streaming_callback(
                    StreamingResponse(text="", is_last_response=True)
                )
            if print_output or self.debug_output:
                print("")
            if prompt_suffix:
                full_response = prompt_suffix + full_response
            self.last_response = full_response
            if add_response_to_chat_history:
                chat_history.add_message(
                    {
                        "role": response_role,
                        "content": full_response,
                    }
                )

            return structured_output_settings.handle_structured_output(
                full_response, prompt_suffix=prompt_suffix, provider=self.provider
            )
        else:
            text = completion["choices"][0]["text"]
            if text.strip().endswith(self.messages_formatter.eos_token):
                text = text.replace(self.messages_formatter.eos_token, "")
            if print_output or self.debug_output:
                print(text)
            if prompt_suffix:
                text = prompt_suffix + text
            self.last_response = text
            if add_response_to_chat_history:
                chat_history.add_message(
                    {
                        "role": response_role,
                        "content": text,
                    }
                )

            return structured_output_settings.handle_structured_output(text, prompt_suffix=prompt_suffix, provider=self.provider)
    return "Error: No model loaded!"

Structured Output Settings

`llama_cpp_agent.llm_output_settings.settings`

`LlmStructuredOutputType`

Bases: Enum

Enum for defining different types of structured outputs that can be generated by a Language Model.

Source code in llama_cpp_agent/llm_output_settings/settings.py

class LlmStructuredOutputType(Enum):
    """
    Enum for defining different types of structured outputs that can be generated by a Language Model.
    """

    no_structured_output = "no_structured_output"
    object_instance = "object_instance"
    list_of_objects = "list_of_objects"
    function_calling = "function_calling"
    parallel_function_calling = "parallel_function_calling"

`LlmStructuredOutputSettings`

Bases: BaseModel

Settings for structured output of large language models for using tools like function calling and creating instances of pydantic models.

Attributes:

output_type (LlmStructuredOutputType) –

Defines the type of structured output.
function_tools (Optional[List[LlamaCppFunctionTool]]) –

Tools to enable function calling.
pydantic_models (Optional[List[type[BaseModel]]]) –

List of pydantic models for structured data output.
add_thoughts_and_reasoning_field (Optional[bool]) –

Add thoughts and reasoning field to function calling. Defaults to False.
thoughts_and_reasoning_field_name (Optional[str]) –

Field name for the thoughts and reasoning field. Defaults to "thoughts_and_reasoning".
function_calling_name_field_name (Optional[str]) –

Name of the JSON field for the name of the used function. Defaults to "function".
function_calling_content (Optional[str]) –

Name of the JSON field for the arguments of the used function. Defaults to "arguments".
output_model_name_field_name (Optional[str]) –

Name of the JSON field for the name of the used pydantic model. Defaults to "model".
output_model_attributes_field_name (Optional[str]) –

Name of the JSON field for the fields of the pydantic model. Defaults to "fields".

Methods:

from_llama_cpp_function_tools –

Create settings from a list of LlamaCppFunctionTools with a specific output type.
from_pydantic_models –

Create settings from a list of Pydantic models with a specific output type.
from_open_ai_tools –

Create settings from OpenAI tools for structured outputs.
from_functions –

Create settings from a list of callable functions with a specific output type.
from_llama_index_tools –

Create settings from a list of llama-index tools with a specific output type.
to_openai_tools –

Return a list of OpenAI tools.
add_llama_cpp_function_tool –

Add a LlamaCppFunctionTool to the settings.
add_pydantic_model –

Add a Pydantic model to the settings, ensuring it matches the specified output type.
add_open_ai_tool –

Add an OpenAI tool to the settings, ensuring it matches the specified output type.
add_function_tool –

Add a callable function to the settings, ensuring it matches the specified output type.
add_llama_index_tool –

Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.
get_llm_documentation –

Generate documentation for the models and tools configured within the settings, based on the output type.
get_gbnf_grammar –

Generate a GBNF grammar for tools configured within the settings, based on the output type.
get_json_schema –

Generate a JSON schema for the tools configured within the settings, based on the output type.

Source code in llama_cpp_agent/llm_output_settings/settings.py

class LlmStructuredOutputSettings(BaseModel):
    """
    Settings for structured output of large language models for using tools like function calling and creating instances of pydantic models.

    Attributes:
        output_type (LlmStructuredOutputType): Defines the type of structured output.
        function_tools (Optional[List[LlamaCppFunctionTool]]): Tools to enable function calling.
        pydantic_models (Optional[List[type[BaseModel]]]): List of pydantic models for structured data output.
        add_thoughts_and_reasoning_field (Optional[bool]): Add thoughts and reasoning field to function calling. Defaults to False.
        thoughts_and_reasoning_field_name (Optional[str]): Field name for the thoughts and reasoning field. Defaults to "thoughts_and_reasoning".
        function_calling_name_field_name (Optional[str]): Name of the JSON field for the name of the used function. Defaults to "function".
        function_calling_content (Optional[str]): Name of the JSON field for the arguments of the used function. Defaults to "arguments".
        output_model_name_field_name (Optional[str]): Name of the JSON field for the name of the used pydantic model. Defaults to "model".
        output_model_attributes_field_name (Optional[str]): Name of the JSON field for the fields of the pydantic model. Defaults to "fields".

    Methods:
        from_llama_cpp_function_tools: Create settings from a list of LlamaCppFunctionTools with a specific output type.
        from_pydantic_models: Create settings from a list of Pydantic models with a specific output type.
        from_open_ai_tools: Create settings from OpenAI tools for structured outputs.
        from_functions: Create settings from a list of callable functions with a specific output type.
        from_llama_index_tools: Create settings from a list of llama-index tools with a specific output type.
        to_openai_tools: Return a list of OpenAI tools.
        add_llama_cpp_function_tool: Add a LlamaCppFunctionTool to the settings.
        add_pydantic_model: Add a Pydantic model to the settings, ensuring it matches the specified output type.
        add_open_ai_tool: Add an OpenAI tool to the settings, ensuring it matches the specified output type.
        add_function_tool: Add a callable function to the settings, ensuring it matches the specified output type.
        add_llama_index_tool: Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.
        get_llm_documentation: Generate documentation for the models and tools configured within the settings, based on the output type.
        get_gbnf_grammar: Generate a GBNF grammar for tools configured within the settings, based on the output type.
        get_json_schema: Generate a JSON schema for the tools configured within the settings, based on the output type.
    """

    output_type: Optional[LlmStructuredOutputType] = Field(
        ..., description="The output type of the llm"
    )
    function_tools: Optional[List[LlamaCppFunctionTool]] = Field(
        None, description="List of functions tools for function calling"
    )
    pydantic_models: Optional[List[type[BaseModel]]] = Field(
        None, description="List of pydantic models for structured output"
    )
    add_thoughts_and_reasoning_field: Optional[bool] = Field(
        False, description="Add thoughts and reasoning field to function calling output"
    )
    add_heartbeat_field: Optional[bool] = Field(
        False,
        description="Add heartbeat field to function calling output to let the LLM indicate if it wants control back after this function call."
    )

    thoughts_and_reasoning_field_name: Optional[str] = Field(
        "thoughts_and_reasoning",
        description="Field name for the thoughts and reasoning field",
    )

    heartbeat_field_name: Optional[str] = Field(
        "heartbeat",
        description="Field name for the heartbeat field",
    )

    heartbeat_function_names_list: Optional[List[str]] = Field(
        [],
        description="List of function names that get added a heartbeat field to function calling",
    )
    function_calling_name_field_name: Optional[str] = Field(
        "function",
        description="Name of the JSON field for the name of the used function.",
    )
    function_calling_content: Optional[str] = Field(
        "arguments",
        description="Name of the JSON field for the arguments of the used function.",
    )

    output_model_name_field_name: Optional[str] = Field(
        "model",
        description="Name of the JSON field for the name of the used pydantic model.",
    )
    output_model_attributes_field_name: Optional[str] = Field(
        "fields",
        description="Name of the JSON field for the fields of the pydantic model.",
    )

    output_raw_json_string: Optional[bool] = Field(
        False,
        description="If the output should be just the generated JSON string by the LLM",
    )
    output_structured_output_and_raw_json_string: Optional[bool] = Field(
        False,
        description="If the output should be a tuple of the output and the generated JSON string by the LLM",
    )

    class Config:
        arbitrary_types_allowed = True

    @staticmethod
    def from_llama_cpp_function_tools(
            llama_cpp_function_tools: List[LlamaCppFunctionTool],
            allow_parallel_function_calling: bool = False,
            add_thoughts_and_reasoning_field: bool = False,
            add_heartbeat_field: bool = False,
    ):
        """
        Create settings from a list of LlamaCppFunctionTools with a specific output type.

        Args:
            llama_cpp_function_tools (List[LlamaCppFunctionTool]): List of function tools.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
            add_thoughts_and_reasoning_field (bool): Whether to add thoughts and reasoning field to function calling. Defaults to False.:
            add_heartbeat_field (bool): Whether to add heartbeat field to function calling. Defaults to False.:
        Returns:
            LlmStructuredOutputSettings: Configured settings object.
        """
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.function_calling
            if not allow_parallel_function_calling
            else LlmStructuredOutputType.parallel_function_calling,
            function_tools=llama_cpp_function_tools,
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
            add_heartbeat_field=add_heartbeat_field
        )

    @staticmethod
    def from_pydantic_models(
            models: List[type[BaseModel]], output_type: LlmStructuredOutputType,
            add_thoughts_and_reasoning_field: bool = False
    ):
        """
        Create settings from a list of Pydantic models with a specific output type.

        Args:
            models (List[BaseModel]): List of Pydantic models.
            output_type (LlmStructuredOutputType): Desired output type.

        Returns:
            LlmStructuredOutputSettings: Configured settings object.

        Raises:
            NotImplementedError: If no structured output is specified for the output type.
        """
        if output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif output_type is LlmStructuredOutputType.object_instance:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.object_instance,
                pydantic_models=models,
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )
        elif output_type is LlmStructuredOutputType.list_of_objects:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.list_of_objects,
                pydantic_models=models,
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )
        elif output_type is LlmStructuredOutputType.function_calling:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.function_calling,
                function_tools=[LlamaCppFunctionTool(model) for model in models],
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )
        elif output_type is LlmStructuredOutputType.parallel_function_calling:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.parallel_function_calling,
                function_tools=[LlamaCppFunctionTool(model) for model in models],
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )

    @staticmethod
    def from_open_ai_tools(
            tools: List[Tuple[Dict[str, Any], Callable]],
            allow_parallel_function_calling: bool = False,
    ):
        """
        Create settings from OpenAI tools for structured outputs.

        Args:
            tools (List[Tuple[Dict[str, Any], Callable]]): List of OpenAI tools defined by a schema and associated function.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.

        Returns:
            LlmStructuredOutputSettings: Configured settings object.
        """
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.parallel_function_calling
            if allow_parallel_function_calling
            else LlmStructuredOutputType.function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in tools],
        )

    @staticmethod
    def from_functions(
            tools: List[Callable], allow_parallel_function_calling: bool = False,
            add_thoughts_and_reasoning_field: bool = False,
            add_heartbeat_field: bool = False,
    ):
        """
        Create settings from a list of llama-index tools with a specific output type.

        Args:
            tools (list): List of llama-index tools.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
            add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
            add_heartbeat_field (bool): Whether to add a heartbeat field to output.
        Returns:
            LlmStructuredOutputSettings: Configured settings object.

        Raises:
            NotImplementedError: If the specified output type is not supported for tools.
        """
        return LlmStructuredOutputSettings(
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
            output_type=LlmStructuredOutputType.parallel_function_calling
            if allow_parallel_function_calling
            else LlmStructuredOutputType.function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in tools],
            add_heartbeat_field=add_heartbeat_field
        )

    @staticmethod
    def from_llama_index_tools(
            tools: list, allow_parallel_function_calling: bool = False, add_thoughts_and_reasoning_field: bool = False,
            add_heartbeat_field: bool = False,
    ):
        """
        Create settings from a list of llama-index tools with a specific output type. Has to be either LlmOutputType.function_call or LlmOutputType.parallel_function_call.

        Args:
            tools (list): List of llama-index tools.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
            add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
            add_heartbeat_field (bool): Whether to add a heartbeat field to output.
        Returns:
            LlmStructuredOutputSettings: Configured settings object.

        Raises:
            NotImplementedError: If the specified output type is not supported for tools.
        """
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.parallel_function_calling
            if allow_parallel_function_calling
            else LlmStructuredOutputType.function_calling,
            function_tools=[
                LlamaCppFunctionTool.from_llama_index_tool(model) for model in tools
            ],
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
            add_heartbeat_field=add_heartbeat_field
        )

    def to_openai_tools(self):
        """
        Return a list of OpenAI tools.
        Returns:
            List[Dict[str, Any]]: List of OpenAI tools.

        Raises:
            NotImplementedError: If the specified output type is not supported for tools.
        """
        if self.function_tools is not None:
            return [tool.to_openai_tool() for tool in self.function_tools]

    def add_llama_cpp_function_tool(self, tool: LlamaCppFunctionTool):
        """
        Add a LlamaCppFunctionTool to the settings.

        Args:
            tool (LlamaCppFunctionTool): The function tool to add.
        """
        self.function_tools.append(tool)

    def add_pydantic_model(self, model: BaseModel, name: str = None):
        """
        Add a Pydantic model to the settings, ensuring it matches the specified output type.

        Args:
            model (BaseModel): The Pydantic model to add.

        Raises:
            NotImplementedError: If no structured output is specified.
        """
        if name is not None:
            model.__name__ = name
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.object_instance:
            self.pydantic_models.append(model)
        elif self.output_type is LlmStructuredOutputType.list_of_objects:
            self.pydantic_models.append(model)
        elif self.output_type is LlmStructuredOutputType.function_calling:
            self.function_tools.append(LlamaCppFunctionTool(model))
        elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
            self.function_tools.append(LlamaCppFunctionTool(model))

    def add_open_ai_tool(
            self, open_ai_schema_and_function: Tuple[Dict[str, Any], Callable], name: str = None
    ):
        """
        Add an OpenAI tool to the settings, ensuring it matches the specified output type.

        Args:
            open_ai_schema_and_function (Tuple[Dict[str, Any], Callable]): The OpenAI schema and associated function to add.

        Raises:
            NotImplementedError: If the output type does not support adding tools.
        """
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            tool = LlamaCppFunctionTool(open_ai_schema_and_function)
            if name is not None:
                tool.set_name(name)
            self.function_tools.append(
                tool
            )
        else:
            raise NotImplementedError(
                f"LlmOutputType: {self.output_type.value} not supported for tools!"
            )

    def add_function_tool(self, function: Callable, name: str = None):
        """
        Add a callable function to the settings, ensuring it matches the specified output type.

        Args:
            function (Callable): The function to add.

        Raises:
            NotImplementedError: If the output type does not support adding tools.
        """
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            tool = LlamaCppFunctionTool(function)
            if name is not None:
                tool.set_name(name)
            self.function_tools.append(
                tool
            )
        else:
            raise NotImplementedError(
                f"LlmOutputType: {self.output_type.value} not supported for tools!"
            )

    def add_llama_index_tool(self, tool, name: str = None):
        """
        Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.

        Args:
            tool: The llama-index tool to add.

        Raises:
            NotImplementedError: If the output type does not support adding tools.
        """
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            tool = LlamaCppFunctionTool.from_llama_index_tool(tool)
            if name is not None:
                tool.set_name(name)
            self.function_tools.append(
                tool
            )
        else:
            raise NotImplementedError(
                f"LlmOutputType: {self.output_type.value} not supported for tools!"
            )

    def set_name(self, index: int, name: str):
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            self.function_tools[index].set_name(name)
        else:
            self.pydantic_models[index].__name__ = name

    def get_llm_documentation(self, provider):
        """
        Generate documentation for the models and tools configured within the settings, based on the output type.

        Returns:
            str: Generated documentation for the configured models or tools.

        Raises:
            NotImplementedError: If no structured output is specified.
        """
        json_schema_mode = False
        from llama_cpp_agent.providers.tgi_server import TGIServerProvider
        from llama_cpp_agent.providers.vllm_server import VLLMServerProvider

        if isinstance(provider, TGIServerProvider) or isinstance(
                provider, VLLMServerProvider
        ):
            json_schema_mode = True
        if self.output_type == LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type == LlmStructuredOutputType.object_instance:
            return generate_text_documentation(
                self.pydantic_models, ordered_json_mode=json_schema_mode
            ).strip()
        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            return generate_text_documentation(
                self.pydantic_models, ordered_json_mode=json_schema_mode
            ).strip()
        elif self.output_type == LlmStructuredOutputType.function_calling:
            return generate_text_documentation(
                [tool.model for tool in self.function_tools],
                model_prefix="Function",
                fields_prefix="Parameters",
                ordered_json_mode=json_schema_mode,
            ).strip()
        elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
            return generate_text_documentation(
                [tool.model for tool in self.function_tools],
                model_prefix="Function",
                fields_prefix="Parameters",
                ordered_json_mode=json_schema_mode,
            ).strip()

    def get_gbnf_grammar(self):
        """
        Generate a GBNF grammar for tools configured within the settings, based on the output type.

        Returns:
            str: Generated GBNF grammar for the configured models or tools.

        Raises:
            NotImplementedError: If no structured output is specified.
        """

        if self.output_type == LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type == LlmStructuredOutputType.object_instance:
            return generate_gbnf_grammar_from_pydantic_models(
                self.pydantic_models,
                list_of_outputs=False,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_name_field_name),
                outer_object_content=(self.output_model_attributes_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_attributes_field_name),
                inner_thought_field_name=self.thoughts_and_reasoning_field_name,
                allow_only_inner_thoughts=False,
                add_request_heartbeat=False,
            )
        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            return generate_gbnf_grammar_from_pydantic_models(
                self.pydantic_models,
                list_of_outputs=True,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_name_field_name),
                outer_object_content=(self.output_model_attributes_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_attributes_field_name),
                inner_thought_field_name=(self.thoughts_and_reasoning_field_name),
                allow_only_inner_thoughts=False,
                add_request_heartbeat=False,
            )
        elif self.output_type == LlmStructuredOutputType.function_calling:
            return generate_gbnf_grammar_from_pydantic_models(
                [tool.model for tool in self.function_tools],
                list_of_outputs=False,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_name_field_name),
                outer_object_content=(self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_content),
                inner_thought_field_name=self.thoughts_and_reasoning_field_name,
                allow_only_inner_thoughts=False,
                add_request_heartbeat=self.add_heartbeat_field,
                request_heartbeat_field_name=self.heartbeat_field_name,
                request_heartbeat_models=self.heartbeat_function_names_list
            )
        elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
            return generate_gbnf_grammar_from_pydantic_models(
                [tool.model for tool in self.function_tools],
                list_of_outputs=True,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_name_field_name),
                outer_object_content=(self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_content),
                inner_thought_field_name=self.thoughts_and_reasoning_field_name,
                allow_only_inner_thoughts=False,
                add_request_heartbeat=self.add_heartbeat_field,
                request_heartbeat_field_name=self.heartbeat_field_name,
                request_heartbeat_models=self.heartbeat_function_names_list
            )

    def get_json_schema(self):
        """
        Generate a JSON schema for the tools configured within the settings, based on the output type.

        Returns:
            Dict: Generated JSON schema for the configured models or tools.

        Raises:
            NotImplementedError: If no structured output is specified.
        """

        if self.output_type == LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type == LlmStructuredOutputType.object_instance:
            return generate_json_schemas(
                self.pydantic_models,
                allow_list=False,
                outer_object_name=("001_" + self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.output_model_name_field_name),
                outer_object_properties_name=(
                        "002_" + self.output_model_attributes_field_name
                )
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.output_model_attributes_field_name),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            )
        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            return generate_json_schemas(
                self.pydantic_models,
                allow_list=True,
                outer_object_name=("001_" + self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.output_model_name_field_name),
                outer_object_properties_name=(
                        "002_" + self.output_model_attributes_field_name
                )
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.output_model_attributes_field_name),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            )
        elif self.output_type is LlmStructuredOutputType.function_calling:
            return generate_json_schemas(
                [tool.model for tool in self.function_tools],
                allow_list=False,
                outer_object_name=("001_" + self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.function_calling_name_field_name),
                outer_object_properties_name=("002_" + self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.function_calling_content),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                add_heartbeat=self.add_heartbeat_field,
                heartbeat_name=("003_" + self.heartbeat_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("004_" + self.heartbeat_field_name),
                heartbeat_list=self.heartbeat_function_names_list
            )
        elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
            return generate_json_schemas(
                [tool.model for tool in self.function_tools],
                allow_list=True,
                outer_object_name=("001_" + self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.function_calling_name_field_name),
                outer_object_properties_name=("002_" + self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.function_calling_content),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                add_heartbeat=self.add_heartbeat_field,
                heartbeat_name=("003_" + self.heartbeat_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("004_" + self.heartbeat_field_name),
                heartbeat_list=self.heartbeat_function_names_list
            )

    def add_function_name_to_heartbeat_list(self, function_name: str):
        """
        Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
        """
        self.heartbeat_function_names_list.append(function_name)

    def add_all_current_functions_to_heartbeat_list(self, excluded: list[str] = None):
        """
        Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
        """
        if excluded is None:
            excluded = []

        self.heartbeat_function_names_list.extend(
            [tool.model.__name__ for tool in self.function_tools if tool.model.__name__ not in excluded]
        )

    def handle_structured_output(self, llm_output: str, prompt_suffix: str = None, provider=None):

        if self.output_raw_json_string:
            return llm_output

        if prompt_suffix:
            llm_output = llm_output.replace(prompt_suffix, "", 1)

        if (
                self.output_type is LlmStructuredOutputType.function_calling
                or self.output_type is LlmStructuredOutputType.parallel_function_calling
        ):
            output = parse_json_response(llm_output)
            output = self.clean_keys(output)
            if self.output_structured_output_and_raw_json_string:
                return self.handle_function_call(output), llm_output
            return self.handle_function_call(output)
        elif self.output_type == LlmStructuredOutputType.object_instance:
            output = parse_json_response(llm_output)
            output = self.clean_keys(output)
            model_name = output[self.output_model_name_field_name]
            model_attributes = output[self.output_model_attributes_field_name]
            for model in self.pydantic_models:
                if model_name == model.__name__:
                    if self.output_structured_output_and_raw_json_string:
                        return model(**model_attributes), llm_output
                    return model(**model_attributes)

        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            output = parse_json_response(llm_output)
            output = self.clean_keys(output)
            models = []
            for out in output:
                for model in self.pydantic_models:
                    model_name = out[self.output_model_name_field_name]
                    model_attributes = out[self.output_model_attributes_field_name]
                    if model_name == model.__name__:
                        models.append(model(**model_attributes))
            if self.output_structured_output_and_raw_json_string:
                return models, llm_output
            return models
        return llm_output

    def handle_function_call(self, function_call_response: Union[dict, List[dict]]):
        """
        Handle a function call response and return the output.

        Args:
            function_call_response (dict): The function call response.

        Returns:
            str: The output of the function call or an error message.
        """

        try:
            function_call = function_call_response
            if function_call is None:
                return "Error: Invalid function call response."
            if not self.output_type == LlmStructuredOutputType.parallel_function_calling:
                output = self.intern_function_call(function_call)
            else:
                output = self.intern_parallel_function_call(function_call)

            return output

        except AttributeError as e:
            return f"Error: {e}"

    def intern_function_call(self, function_call: dict):
        """
        Internal method to handle a function call and return the output.

        Args:
            function_call (dict): The function call dictionary.
        Returns:
            str: The output of the function call or an error message.
        """
        if self.function_calling_content in function_call:
            function_tool = None
            for tool in self.function_tools:
                if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                    function_tool = tool
                    break
            if function_tool is not None:
                cls = function_tool.model
                call_parameters = function_call[self.function_calling_content]
                call = cls(**call_parameters)
                output = call.run(**function_tool.additional_parameters)
                return [
                    {
                        self.function_calling_name_field_name: function_tool.model.__name__,
                        self.function_calling_content: call_parameters,
                        "return_value": output,
                    }
                ]

    def intern_parallel_function_call(self, function_calls: List[dict]):
        """
        Internal method to handle a function call and return the output.

        Args:
            function_calls List[dict]: The function call dictionary.

        Returns:
            str: The output of the function call or an error message.
        """
        result = []
        for function_call in function_calls:
            if self.function_calling_content in function_call:
                function_tool = None
                for tool in self.function_tools:
                    if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                        function_tool = tool
                        break
                if function_tool is not None:
                    try:
                        cls = function_tool.model
                        call_parameters = function_call[self.function_calling_content]
                        call = cls(**call_parameters)
                        output = call.run(**function_tool.additional_parameters)
                        result.append(
                            {
                                self.function_calling_name_field_name: function_tool.model.__name__,
                                self.function_calling_content: call_parameters,
                                "return_value": output,
                            }
                        )

                    except AttributeError as e:
                        return f"Error: {e}"

        return result

    def clean_keys(self, data) -> Dict[str, Any] | List[Dict[str, Any]]:
        if isinstance(data, dict):
            # Create a new dictionary with modified keys
            new_dict = {}
            for key, value in data.items():
                # Remove the leading 'XXX_' from keys
                new_key = re.sub(r"^\d{3}_", "", key)
                # Recursively clean nested dictionaries and lists
                new_dict[new_key] = self.clean_keys(value)
            return new_dict
        elif isinstance(data, list):
            # Process each item in the list
            return [self.clean_keys(item) for item in data]
        else:
            # Return the item as is if it's not a dict or list
            return data

`from_llama_cpp_function_tools(llama_cpp_function_tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False)` `staticmethod`

Create settings from a list of LlamaCppFunctionTools with a specific output type.

Parameters:

llama_cpp_function_tools (List[LlamaCppFunctionTool]) –

List of function tools.
allow_parallel_function_calling (bool, default: False ) –

Whether to enable parallel function calling. Defaults to False.
add_thoughts_and_reasoning_field (bool, default: False ) –

Whether to add thoughts and reasoning field to function calling. Defaults to False.:
add_heartbeat_field (bool, default: False ) –

Whether to add heartbeat field to function calling. Defaults to False.:

Returns: LlmStructuredOutputSettings: Configured settings object.

Source code in llama_cpp_agent/llm_output_settings/settings.py

@staticmethod
def from_llama_cpp_function_tools(
        llama_cpp_function_tools: List[LlamaCppFunctionTool],
        allow_parallel_function_calling: bool = False,
        add_thoughts_and_reasoning_field: bool = False,
        add_heartbeat_field: bool = False,
):
    """
    Create settings from a list of LlamaCppFunctionTools with a specific output type.

    Args:
        llama_cpp_function_tools (List[LlamaCppFunctionTool]): List of function tools.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
        add_thoughts_and_reasoning_field (bool): Whether to add thoughts and reasoning field to function calling. Defaults to False.:
        add_heartbeat_field (bool): Whether to add heartbeat field to function calling. Defaults to False.:
    Returns:
        LlmStructuredOutputSettings: Configured settings object.
    """
    return LlmStructuredOutputSettings(
        output_type=LlmStructuredOutputType.function_calling
        if not allow_parallel_function_calling
        else LlmStructuredOutputType.parallel_function_calling,
        function_tools=llama_cpp_function_tools,
        add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
        add_heartbeat_field=add_heartbeat_field
    )

`from_pydantic_models(models, output_type, add_thoughts_and_reasoning_field=False)` `staticmethod`

Create settings from a list of Pydantic models with a specific output type.

Parameters:

models (List[BaseModel]) –

List of Pydantic models.
output_type (LlmStructuredOutputType) –

Desired output type.

Returns:

LlmStructuredOutputSettings –

Configured settings object.

Raises:

NotImplementedError –

If no structured output is specified for the output type.

Source code in llama_cpp_agent/llm_output_settings/settings.py

@staticmethod
def from_pydantic_models(
        models: List[type[BaseModel]], output_type: LlmStructuredOutputType,
        add_thoughts_and_reasoning_field: bool = False
):
    """
    Create settings from a list of Pydantic models with a specific output type.

    Args:
        models (List[BaseModel]): List of Pydantic models.
        output_type (LlmStructuredOutputType): Desired output type.

    Returns:
        LlmStructuredOutputSettings: Configured settings object.

    Raises:
        NotImplementedError: If no structured output is specified for the output type.
    """
    if output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif output_type is LlmStructuredOutputType.object_instance:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.object_instance,
            pydantic_models=models,
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )
    elif output_type is LlmStructuredOutputType.list_of_objects:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.list_of_objects,
            pydantic_models=models,
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )
    elif output_type is LlmStructuredOutputType.function_calling:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in models],
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )
    elif output_type is LlmStructuredOutputType.parallel_function_calling:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.parallel_function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in models],
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )

`from_open_ai_tools(tools, allow_parallel_function_calling=False)` `staticmethod`

Create settings from OpenAI tools for structured outputs.

Parameters:

tools (List[Tuple[Dict[str, Any], Callable]]) –

List of OpenAI tools defined by a schema and associated function.
allow_parallel_function_calling (bool, default: False ) –

Whether to enable parallel function calling. Defaults to False.

Returns:

LlmStructuredOutputSettings –

Configured settings object.

Source code in llama_cpp_agent/llm_output_settings/settings.py

@staticmethod
def from_open_ai_tools(
        tools: List[Tuple[Dict[str, Any], Callable]],
        allow_parallel_function_calling: bool = False,
):
    """
    Create settings from OpenAI tools for structured outputs.

    Args:
        tools (List[Tuple[Dict[str, Any], Callable]]): List of OpenAI tools defined by a schema and associated function.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.

    Returns:
        LlmStructuredOutputSettings: Configured settings object.
    """
    return LlmStructuredOutputSettings(
        output_type=LlmStructuredOutputType.parallel_function_calling
        if allow_parallel_function_calling
        else LlmStructuredOutputType.function_calling,
        function_tools=[LlamaCppFunctionTool(model) for model in tools],
    )

`from_functions(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False)` `staticmethod`

Create settings from a list of llama-index tools with a specific output type.

Parameters:

tools (list) –

List of llama-index tools.
allow_parallel_function_calling (bool, default: False ) –

Whether to enable parallel function calling. Defaults to False.
add_thoughts_and_reasoning_field (bool, default: False ) –

Whether to add a thoughts and reasoning field to output.
add_heartbeat_field (bool, default: False ) –

Whether to add a heartbeat field to output.

Returns: LlmStructuredOutputSettings: Configured settings object.

Raises:

NotImplementedError –

If the specified output type is not supported for tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py

@staticmethod
def from_functions(
        tools: List[Callable], allow_parallel_function_calling: bool = False,
        add_thoughts_and_reasoning_field: bool = False,
        add_heartbeat_field: bool = False,
):
    """
    Create settings from a list of llama-index tools with a specific output type.

    Args:
        tools (list): List of llama-index tools.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
        add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
        add_heartbeat_field (bool): Whether to add a heartbeat field to output.
    Returns:
        LlmStructuredOutputSettings: Configured settings object.

    Raises:
        NotImplementedError: If the specified output type is not supported for tools.
    """
    return LlmStructuredOutputSettings(
        add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
        output_type=LlmStructuredOutputType.parallel_function_calling
        if allow_parallel_function_calling
        else LlmStructuredOutputType.function_calling,
        function_tools=[LlamaCppFunctionTool(model) for model in tools],
        add_heartbeat_field=add_heartbeat_field
    )

`from_llama_index_tools(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False)` `staticmethod`

Create settings from a list of llama-index tools with a specific output type. Has to be either LlmOutputType.function_call or LlmOutputType.parallel_function_call.

Parameters:

tools (list) –

List of llama-index tools.
allow_parallel_function_calling (bool, default: False ) –

Whether to enable parallel function calling. Defaults to False.
add_thoughts_and_reasoning_field (bool, default: False ) –

Whether to add a thoughts and reasoning field to output.
add_heartbeat_field (bool, default: False ) –

Whether to add a heartbeat field to output.

Returns: LlmStructuredOutputSettings: Configured settings object.

Raises:

NotImplementedError –

If the specified output type is not supported for tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py

@staticmethod
def from_llama_index_tools(
        tools: list, allow_parallel_function_calling: bool = False, add_thoughts_and_reasoning_field: bool = False,
        add_heartbeat_field: bool = False,
):
    """
    Create settings from a list of llama-index tools with a specific output type. Has to be either LlmOutputType.function_call or LlmOutputType.parallel_function_call.

    Args:
        tools (list): List of llama-index tools.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
        add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
        add_heartbeat_field (bool): Whether to add a heartbeat field to output.
    Returns:
        LlmStructuredOutputSettings: Configured settings object.

    Raises:
        NotImplementedError: If the specified output type is not supported for tools.
    """
    return LlmStructuredOutputSettings(
        output_type=LlmStructuredOutputType.parallel_function_calling
        if allow_parallel_function_calling
        else LlmStructuredOutputType.function_calling,
        function_tools=[
            LlamaCppFunctionTool.from_llama_index_tool(model) for model in tools
        ],
        add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
        add_heartbeat_field=add_heartbeat_field
    )

`to_openai_tools()`

Return a list of OpenAI tools. Returns: List[Dict[str, Any]]: List of OpenAI tools.

Raises:

NotImplementedError –

If the specified output type is not supported for tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def to_openai_tools(self):
    """
    Return a list of OpenAI tools.
    Returns:
        List[Dict[str, Any]]: List of OpenAI tools.

    Raises:
        NotImplementedError: If the specified output type is not supported for tools.
    """
    if self.function_tools is not None:
        return [tool.to_openai_tool() for tool in self.function_tools]

`add_llama_cpp_function_tool(tool)`

Add a LlamaCppFunctionTool to the settings.

Parameters:

tool (LlamaCppFunctionTool) –

The function tool to add.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def add_llama_cpp_function_tool(self, tool: LlamaCppFunctionTool):
    """
    Add a LlamaCppFunctionTool to the settings.

    Args:
        tool (LlamaCppFunctionTool): The function tool to add.
    """
    self.function_tools.append(tool)

`add_pydantic_model(model, name=None)`

Add a Pydantic model to the settings, ensuring it matches the specified output type.

Parameters:

model (BaseModel) –

The Pydantic model to add.

Raises:

NotImplementedError –

If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def add_pydantic_model(self, model: BaseModel, name: str = None):
    """
    Add a Pydantic model to the settings, ensuring it matches the specified output type.

    Args:
        model (BaseModel): The Pydantic model to add.

    Raises:
        NotImplementedError: If no structured output is specified.
    """
    if name is not None:
        model.__name__ = name
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.object_instance:
        self.pydantic_models.append(model)
    elif self.output_type is LlmStructuredOutputType.list_of_objects:
        self.pydantic_models.append(model)
    elif self.output_type is LlmStructuredOutputType.function_calling:
        self.function_tools.append(LlamaCppFunctionTool(model))
    elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
        self.function_tools.append(LlamaCppFunctionTool(model))

`add_open_ai_tool(open_ai_schema_and_function, name=None)`

Add an OpenAI tool to the settings, ensuring it matches the specified output type.

Parameters:

open_ai_schema_and_function (Tuple[Dict[str, Any], Callable]) –

The OpenAI schema and associated function to add.

Raises:

NotImplementedError –

If the output type does not support adding tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def add_open_ai_tool(
        self, open_ai_schema_and_function: Tuple[Dict[str, Any], Callable], name: str = None
):
    """
    Add an OpenAI tool to the settings, ensuring it matches the specified output type.

    Args:
        open_ai_schema_and_function (Tuple[Dict[str, Any], Callable]): The OpenAI schema and associated function to add.

    Raises:
        NotImplementedError: If the output type does not support adding tools.
    """
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
        tool = LlamaCppFunctionTool(open_ai_schema_and_function)
        if name is not None:
            tool.set_name(name)
        self.function_tools.append(
            tool
        )
    else:
        raise NotImplementedError(
            f"LlmOutputType: {self.output_type.value} not supported for tools!"
        )

`add_function_tool(function, name=None)`

Add a callable function to the settings, ensuring it matches the specified output type.

Parameters:

function (Callable) –

The function to add.

Raises:

NotImplementedError –

If the output type does not support adding tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def add_function_tool(self, function: Callable, name: str = None):
    """
    Add a callable function to the settings, ensuring it matches the specified output type.

    Args:
        function (Callable): The function to add.

    Raises:
        NotImplementedError: If the output type does not support adding tools.
    """
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
        tool = LlamaCppFunctionTool(function)
        if name is not None:
            tool.set_name(name)
        self.function_tools.append(
            tool
        )
    else:
        raise NotImplementedError(
            f"LlmOutputType: {self.output_type.value} not supported for tools!"
        )

`add_llama_index_tool(tool, name=None)`

Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.

Parameters:

tool –

The llama-index tool to add.

Raises:

NotImplementedError –

If the output type does not support adding tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def add_llama_index_tool(self, tool, name: str = None):
    """
    Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.

    Args:
        tool: The llama-index tool to add.

    Raises:
        NotImplementedError: If the output type does not support adding tools.
    """
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
        tool = LlamaCppFunctionTool.from_llama_index_tool(tool)
        if name is not None:
            tool.set_name(name)
        self.function_tools.append(
            tool
        )
    else:
        raise NotImplementedError(
            f"LlmOutputType: {self.output_type.value} not supported for tools!"
        )

`get_llm_documentation(provider)`

Generate documentation for the models and tools configured within the settings, based on the output type.

Returns:

str –

Generated documentation for the configured models or tools.

Raises:

NotImplementedError –

If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def get_llm_documentation(self, provider):
    """
    Generate documentation for the models and tools configured within the settings, based on the output type.

    Returns:
        str: Generated documentation for the configured models or tools.

    Raises:
        NotImplementedError: If no structured output is specified.
    """
    json_schema_mode = False
    from llama_cpp_agent.providers.tgi_server import TGIServerProvider
    from llama_cpp_agent.providers.vllm_server import VLLMServerProvider

    if isinstance(provider, TGIServerProvider) or isinstance(
            provider, VLLMServerProvider
    ):
        json_schema_mode = True
    if self.output_type == LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type == LlmStructuredOutputType.object_instance:
        return generate_text_documentation(
            self.pydantic_models, ordered_json_mode=json_schema_mode
        ).strip()
    elif self.output_type == LlmStructuredOutputType.list_of_objects:
        return generate_text_documentation(
            self.pydantic_models, ordered_json_mode=json_schema_mode
        ).strip()
    elif self.output_type == LlmStructuredOutputType.function_calling:
        return generate_text_documentation(
            [tool.model for tool in self.function_tools],
            model_prefix="Function",
            fields_prefix="Parameters",
            ordered_json_mode=json_schema_mode,
        ).strip()
    elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
        return generate_text_documentation(
            [tool.model for tool in self.function_tools],
            model_prefix="Function",
            fields_prefix="Parameters",
            ordered_json_mode=json_schema_mode,
        ).strip()

`get_gbnf_grammar()`

Generate a GBNF grammar for tools configured within the settings, based on the output type.

Returns:

str –

Generated GBNF grammar for the configured models or tools.

Raises:

NotImplementedError –

If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def get_gbnf_grammar(self):
    """
    Generate a GBNF grammar for tools configured within the settings, based on the output type.

    Returns:
        str: Generated GBNF grammar for the configured models or tools.

    Raises:
        NotImplementedError: If no structured output is specified.
    """

    if self.output_type == LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type == LlmStructuredOutputType.object_instance:
        return generate_gbnf_grammar_from_pydantic_models(
            self.pydantic_models,
            list_of_outputs=False,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_name_field_name),
            outer_object_content=(self.output_model_attributes_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_attributes_field_name),
            inner_thought_field_name=self.thoughts_and_reasoning_field_name,
            allow_only_inner_thoughts=False,
            add_request_heartbeat=False,
        )
    elif self.output_type == LlmStructuredOutputType.list_of_objects:
        return generate_gbnf_grammar_from_pydantic_models(
            self.pydantic_models,
            list_of_outputs=True,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_name_field_name),
            outer_object_content=(self.output_model_attributes_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_attributes_field_name),
            inner_thought_field_name=(self.thoughts_and_reasoning_field_name),
            allow_only_inner_thoughts=False,
            add_request_heartbeat=False,
        )
    elif self.output_type == LlmStructuredOutputType.function_calling:
        return generate_gbnf_grammar_from_pydantic_models(
            [tool.model for tool in self.function_tools],
            list_of_outputs=False,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_name_field_name),
            outer_object_content=(self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_content),
            inner_thought_field_name=self.thoughts_and_reasoning_field_name,
            allow_only_inner_thoughts=False,
            add_request_heartbeat=self.add_heartbeat_field,
            request_heartbeat_field_name=self.heartbeat_field_name,
            request_heartbeat_models=self.heartbeat_function_names_list
        )
    elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
        return generate_gbnf_grammar_from_pydantic_models(
            [tool.model for tool in self.function_tools],
            list_of_outputs=True,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_name_field_name),
            outer_object_content=(self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_content),
            inner_thought_field_name=self.thoughts_and_reasoning_field_name,
            allow_only_inner_thoughts=False,
            add_request_heartbeat=self.add_heartbeat_field,
            request_heartbeat_field_name=self.heartbeat_field_name,
            request_heartbeat_models=self.heartbeat_function_names_list
        )

`get_json_schema()`

Generate a JSON schema for the tools configured within the settings, based on the output type.

Returns:

Dict –

Generated JSON schema for the configured models or tools.

Raises:

NotImplementedError –

If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def get_json_schema(self):
    """
    Generate a JSON schema for the tools configured within the settings, based on the output type.

    Returns:
        Dict: Generated JSON schema for the configured models or tools.

    Raises:
        NotImplementedError: If no structured output is specified.
    """

    if self.output_type == LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type == LlmStructuredOutputType.object_instance:
        return generate_json_schemas(
            self.pydantic_models,
            allow_list=False,
            outer_object_name=("001_" + self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.output_model_name_field_name),
            outer_object_properties_name=(
                    "002_" + self.output_model_attributes_field_name
            )
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.output_model_attributes_field_name),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
        )
    elif self.output_type == LlmStructuredOutputType.list_of_objects:
        return generate_json_schemas(
            self.pydantic_models,
            allow_list=True,
            outer_object_name=("001_" + self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.output_model_name_field_name),
            outer_object_properties_name=(
                    "002_" + self.output_model_attributes_field_name
            )
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.output_model_attributes_field_name),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
        )
    elif self.output_type is LlmStructuredOutputType.function_calling:
        return generate_json_schemas(
            [tool.model for tool in self.function_tools],
            allow_list=False,
            outer_object_name=("001_" + self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.function_calling_name_field_name),
            outer_object_properties_name=("002_" + self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.function_calling_content),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            add_heartbeat=self.add_heartbeat_field,
            heartbeat_name=("003_" + self.heartbeat_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("004_" + self.heartbeat_field_name),
            heartbeat_list=self.heartbeat_function_names_list
        )
    elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
        return generate_json_schemas(
            [tool.model for tool in self.function_tools],
            allow_list=True,
            outer_object_name=("001_" + self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.function_calling_name_field_name),
            outer_object_properties_name=("002_" + self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.function_calling_content),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            add_heartbeat=self.add_heartbeat_field,
            heartbeat_name=("003_" + self.heartbeat_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("004_" + self.heartbeat_field_name),
            heartbeat_list=self.heartbeat_function_names_list
        )

`add_function_name_to_heartbeat_list(function_name)`

Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def add_function_name_to_heartbeat_list(self, function_name: str):
    """
    Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
    """
    self.heartbeat_function_names_list.append(function_name)

`add_all_current_functions_to_heartbeat_list(excluded=None)`

Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def add_all_current_functions_to_heartbeat_list(self, excluded: list[str] = None):
    """
    Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
    """
    if excluded is None:
        excluded = []

    self.heartbeat_function_names_list.extend(
        [tool.model.__name__ for tool in self.function_tools if tool.model.__name__ not in excluded]
    )

`handle_function_call(function_call_response)`

Handle a function call response and return the output.

Parameters:

function_call_response (dict) –

The function call response.

Returns:

str –

The output of the function call or an error message.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def handle_function_call(self, function_call_response: Union[dict, List[dict]]):
    """
    Handle a function call response and return the output.

    Args:
        function_call_response (dict): The function call response.

    Returns:
        str: The output of the function call or an error message.
    """

    try:
        function_call = function_call_response
        if function_call is None:
            return "Error: Invalid function call response."
        if not self.output_type == LlmStructuredOutputType.parallel_function_calling:
            output = self.intern_function_call(function_call)
        else:
            output = self.intern_parallel_function_call(function_call)

        return output

    except AttributeError as e:
        return f"Error: {e}"

`intern_function_call(function_call)`

Internal method to handle a function call and return the output.

Parameters:

function_call (dict) –

The function call dictionary.

Returns: str: The output of the function call or an error message.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def intern_function_call(self, function_call: dict):
    """
    Internal method to handle a function call and return the output.

    Args:
        function_call (dict): The function call dictionary.
    Returns:
        str: The output of the function call or an error message.
    """
    if self.function_calling_content in function_call:
        function_tool = None
        for tool in self.function_tools:
            if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                function_tool = tool
                break
        if function_tool is not None:
            cls = function_tool.model
            call_parameters = function_call[self.function_calling_content]
            call = cls(**call_parameters)
            output = call.run(**function_tool.additional_parameters)
            return [
                {
                    self.function_calling_name_field_name: function_tool.model.__name__,
                    self.function_calling_content: call_parameters,
                    "return_value": output,
                }
            ]

`intern_parallel_function_call(function_calls)`

Internal method to handle a function call and return the output.

Parameters:

function_calls List[dict] –

The function call dictionary.

Returns:

str –

The output of the function call or an error message.

Source code in llama_cpp_agent/llm_output_settings/settings.py

def intern_parallel_function_call(self, function_calls: List[dict]):
    """
    Internal method to handle a function call and return the output.

    Args:
        function_calls List[dict]: The function call dictionary.

    Returns:
        str: The output of the function call or an error message.
    """
    result = []
    for function_call in function_calls:
        if self.function_calling_content in function_call:
            function_tool = None
            for tool in self.function_tools:
                if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                    function_tool = tool
                    break
            if function_tool is not None:
                try:
                    cls = function_tool.model
                    call_parameters = function_call[self.function_calling_content]
                    call = cls(**call_parameters)
                    output = call.run(**function_tool.additional_parameters)
                    result.append(
                        {
                            self.function_calling_name_field_name: function_tool.model.__name__,
                            self.function_calling_content: call_parameters,
                            "return_value": output,
                        }
                    )

                except AttributeError as e:
                    return f"Error: {e}"

    return result

Function Calling Agent

`llama_cpp_agent.function_calling_agent`

`activate_message_mode`

Bases: BaseModel

Activates message mode.

Source code in llama_cpp_agent/function_calling_agent.py

class activate_message_mode(BaseModel):
    """
    Activates message mode.
    """

    def run(self, agent: "FunctionCallingAgent"):
        agent.without_grammar_mode = True
        agent.prompt_suffix = "\nWrite message in plain text format:"
        agent.without_grammar_mode_function.append(agent.send_message_to_user)
        return True

`send_message`

Bases: BaseModel

Sends a message to the user.

Source code in llama_cpp_agent/function_calling_agent.py

class send_message(BaseModel):
    """
    Sends a message to the user.
    """

    content: str = Field(..., description="Content of the message to be sent.")

    def run(self, agent: "FunctionCallingAgent"):
        agent.send_message_to_user(self.content)
        return "Message sent."

`write_text_file`

Bases: BaseModel

Writes content to a file.

Source code in llama_cpp_agent/function_calling_agent.py

class write_text_file(BaseModel):
    """
    Writes content to a file.
    """

    file_path: str = Field(..., description="The path to the file.")
    content: str = Field(..., description="The content to write to the file.")

    def run(self, agent: "FunctionCallingAgent"):
        self.write_file(self.content)
        return True

    def write_file(self, content: str):
        """
        Write content to a file.

        Args:
            content (str): The content to write to the file.
        """
        with open(self.file_path, "w", encoding="utf-8") as file:
            file.write(content)
        return None

`write_file(content)`

Write content to a file.

Parameters:

content (str) –

The content to write to the file.

Source code in llama_cpp_agent/function_calling_agent.py

def write_file(self, content: str):
    """
    Write content to a file.

    Args:
        content (str): The content to write to the file.
    """
    with open(self.file_path, "w", encoding="utf-8") as file:
        file.write(content)
    return None

`read_text_file`

Bases: BaseModel

Reads the content of a file.

Source code in llama_cpp_agent/function_calling_agent.py

class read_text_file(BaseModel):
    """
    Reads the content of a file.
    """

    file_path: str = Field(..., description="The path to the file.")

    def run(self):
        return self.read_file()

    def read_file(self):
        """
        Reads the content of a file.
        """
        if os.path.exists(self.file_path):
            with open(self.file_path, "r", encoding="utf-8") as file:
                return file.read()
        else:
            return f"File not found."

`read_file()`

Reads the content of a file.

Source code in llama_cpp_agent/function_calling_agent.py

def read_file(self):
    """
    Reads the content of a file.
    """
    if os.path.exists(self.file_path):
        with open(self.file_path, "r", encoding="utf-8") as file:
            return file.read()
    else:
        return f"File not found."

`FunctionCallingAgent`

An agent that uses function calling to interact with its environment and the user.

Parameters:

llama_llm (Llama | LlamaLLMSettings | LlamaCppEndpointSettings | OpenAIEndpointSettings) –

An instance of Llama, LlamaLLMSettings, LlamaCppEndpointSettings or LlamaCppServerLLMSettings as LLM.
llama_generation_settings (LlamaLLMGenerationSettings | LlamaCppGenerationSettings | OpenAIGenerationSettings) –

Generation settings for Llama.
messages_formatter_type (MessagesFormatterType, default: CHATML ) –

Type of messages formatter.
custom_messages_formatter (MessagesFormatter, default: None ) –

Optional Custom messages formatter.
streaming_callback (Callable[[StreamingResponse], None], default: None ) –

Callback function for streaming responses.
k_last_messages_from_chat_history (int, default: 0 ) –

Number of last messages to consider from chat history.
system_prompt (str, default: None ) –

System prompt for interaction.
llama_cpp_function_tools (List[LlamaCppFunctionTool], default: None ) –

List of LlamaCppFunctionTool instances.
allow_parallel_function_calling (bool, default: False ) –

Allow parallel function calling (Default=False)
add_send_message_to_user_function (bool, default: True ) –

Flag to add send_message_to_user function.
send_message_to_user_callback (Callable[[str], None], default: None ) –

Callback for sending a message to the user.
debug_output (bool, default: False ) –

Enable debug output.

Attributes:

send_message_to_user_callback (Callable[[str], None]) –

Callback for sending a message to the user.
llama_cpp_tools (List[LlamaCppFunctionTool]) –

List of LlamaCppFunctionTool instances.
tool_registry (LlamaCppFunctionToolRegistry) –

Function tool registry.
llama_generation_settings (LlamaLLMGenerationSettings) –

Generation settings for Llama.
system_prompt (str) –

System prompt for interaction.
llama_cpp_agent (LlamaCppAgent) –

LlamaCppAgent instance for interaction.
k_last_messages_from_chat_history (int) –

Number of last messages to consider from chat history.
streaming_callback (Callable[[StreamingResponse], None]) –

Callback function for streaming responses.

Methods:

save –

str): Save the agent's state to a file.
load_from_file –

str, llama_llm, python_functions, pydantic_functions, send_message_to_user_callback, streaming_callback) -> FunctionCallingAgent: Load the agent's state from a file.
load_from_dict –

dict) -> FunctionCallingAgent: Load the agent's state from a dictionary.
as_dict –

Convert the agent's state to a dictionary.
generate_response –

str): Generate a response based on the input message.
send_message_to_user –

str): Send a message to the user.

Source code in llama_cpp_agent/function_calling_agent.py

class FunctionCallingAgent:
    """
    An agent that uses function calling to interact with its environment and the user.

    Args:
        llama_llm (Llama | LlamaLLMSettings | LlamaCppEndpointSettings | OpenAIEndpointSettings): An instance of Llama, LlamaLLMSettings, LlamaCppEndpointSettings or LlamaCppServerLLMSettings as LLM.
        llama_generation_settings (LlamaLLMGenerationSettings | LlamaCppGenerationSettings | OpenAIGenerationSettings): Generation settings for Llama.
        messages_formatter_type (MessagesFormatterType): Type of messages formatter.
        custom_messages_formatter (MessagesFormatter): Optional Custom messages formatter.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
        k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
        system_prompt (str): System prompt for interaction.
        llama_cpp_function_tools(List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
        allow_parallel_function_calling (bool): Allow parallel function calling (Default=False)
        add_send_message_to_user_function (bool): Flag to add send_message_to_user function.
        send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
        debug_output (bool): Enable debug output.

    Attributes:
        send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
        llama_cpp_tools (List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
        tool_registry (LlamaCppFunctionToolRegistry): Function tool registry.
        llama_generation_settings (LlamaLLMGenerationSettings): Generation settings for Llama.
        system_prompt (str): System prompt for interaction.
        llama_cpp_agent (LlamaCppAgent): LlamaCppAgent instance for interaction.
        k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.

    Methods:
        save(file_path: str): Save the agent's state to a file.
        load_from_file(file_path: str, llama_llm, python_functions, pydantic_functions, send_message_to_user_callback, streaming_callback) -> FunctionCallingAgent:
            Load the agent's state from a file.
        load_from_dict(agent_dict: dict) -> FunctionCallingAgent: Load the agent's state from a dictionary.
        as_dict() -> dict: Convert the agent's state to a dictionary.
        generate_response(message: str): Generate a response based on the input message.
        send_message_to_user(message: str): Send a message to the user.

    """

    def __init__(
        self,
        llama_llm: LlmProvider,
        messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        k_last_messages_from_chat_history: int = 0,
        system_prompt: str = None,
        llama_cpp_function_tools: [LlamaCppFunctionTool] = None,
        basic_file_tools: bool = False,
        allow_parallel_function_calling=False,
        add_send_message_to_user_function: bool = True,
        send_message_to_user_callback: Callable[[str], None] = None,
        debug_output: bool = False,
    ):
        """
        Initialize the FunctionCallingAgent.

        Args:
            llama_llm (LlmProvider): The LLM Provider.
            messages_formatter_type (MessagesFormatterType): Type of messages formatter.
            custom_messages_formatter (MessagesFormatter): Optional Custom messages formatter.
            streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
            k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
            system_prompt (str): System prompt for interaction.
            llama_cpp_function_tools(List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
            allow_parallel_function_calling (bool): Allow parallel function calling (Default=False)
            add_send_message_to_user_function (bool): Flag to add send_message_to_user function.
            send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
            debug_output (bool): Enable debug output.
        """
        self.llama_cpp_tools = []
        if llama_cpp_function_tools:
            self.llama_cpp_tools = llama_cpp_function_tools

        self.send_message_to_user_callback = send_message_to_user_callback
        if add_send_message_to_user_function:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(send_message, agent=self))

        if basic_file_tools:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(read_text_file))
            self.llama_cpp_tools.append(
                LlamaCppFunctionTool(write_text_file, agent=self)
            )

        self.allow_parallel_function_calling = allow_parallel_function_calling

        self.structured_output_settings = (
            LlmStructuredOutputSettings.from_llama_cpp_function_tools(
                self.llama_cpp_tools, self.allow_parallel_function_calling
            )
        )
        self.structured_output_settings.add_thoughts_and_reasoning_field = True
        self.without_grammar_mode = False
        self.without_grammar_mode_function = []
        self.prompt_suffix = ""
        if system_prompt is not None:
            self.system_prompt = system_prompt
        else:
            self.system_prompt = """You are Funky, an AI assistant that calls functions to perform tasks. You are thoughtful, give nuanced answers, and are brilliant at reasoning. Below is a list of functions you can use to interact with the system. Each function has specific parameters and requirements. Make sure to follow the instructions for each function carefully.
Choose the appropriate function based on the task you want to perform. Provide your function calls in JSON format."""
        self.llama_cpp_agent = LlamaCppAgent(
            llama_llm,
            debug_output=debug_output,
            system_prompt=self.system_prompt,
            predefined_messages_formatter_type=messages_formatter_type,
            custom_messages_formatter=custom_messages_formatter,
        )

        self.k_last_messages_from_chat_history = k_last_messages_from_chat_history
        self.streaming_callback = streaming_callback


    @staticmethod
    def load_from_dict(agent_dict: dict) -> "FunctionCallingAgent":
        """
        Load the agent's state from a dictionary.

        Args:
            agent_dict (dict): The dictionary containing the agent's state.

        Returns:
            FunctionCallingAgent: The loaded FunctionCallingAgent instance.
        """
        return FunctionCallingAgent(**agent_dict)

    def as_dict(self) -> dict:
        """
        Convert the agent's state to a dictionary.

        Returns:
           dict: The dictionary representation of the agent's state.
        """
        return self.__dict__

    def generate_response(
        self,
        message: str,
        llm_sampling_settings: LlmSamplingSettings = None,
        structured_output_settings: LlmStructuredOutputSettings = None,
    ):
        self.llama_cpp_agent.add_message(role=Roles.user, message=message)

        if structured_output_settings is not None:
            structured_output_settings.add_thoughts_and_reasoning_field = True

        result = self.intern_get_response(llm_sampling_settings=llm_sampling_settings, structured_output_settings=structured_output_settings)

        while True:
            if isinstance(result, str):
                if len(self.without_grammar_mode_function) > 0:
                    func_list = []
                    for func in self.without_grammar_mode_function:
                        if func.__name__ not in func_list:
                            func(result.strip())
                            func_list.append(func.__name__)
                break
            function_message = f"""Function Calling Results:\n\n"""
            count = 0
            if result is not None:
                agent_sent_message = False
                for res in result:
                    count += 1
                    if res["function"] == "send_message":
                        agent_sent_message = True
                    if not isinstance(res, str):
                        if "params" in res:
                            function_message += f"""{count}. Function: "{res["function"]}"\nArguments: "{res["params"]}"\nReturn Value: {res["return_value"]}\n\n"""
                        else:
                            function_message += f"""{count}. Function: "{res["function"]}"\nReturn Value: {res["return_value"]}\n\n"""
                    else:
                        function_message += f"{count}. " + res + "\n\n"
                self.llama_cpp_agent.add_message(
                    role=Roles.tool, message=function_message.strip()
                )
                if agent_sent_message:
                    break
            result = self.intern_get_response(
                llm_sampling_settings=llm_sampling_settings, structured_output_settings=structured_output_settings
            )
        return result

    def intern_get_response(
        self,
        llm_sampling_settings: List[str] = None,
        structured_output_settings: LlmStructuredOutputSettings = None,
    ):
        without_grammar_mode = False
        if self.without_grammar_mode:
            without_grammar_mode = True
            self.without_grammar_mode = False
        result = self.llama_cpp_agent.get_chat_response(
            streaming_callback=self.streaming_callback,
            structured_output_settings=self.structured_output_settings
            if structured_output_settings is None
            else structured_output_settings,
            llm_sampling_settings=llm_sampling_settings,
        )
        if without_grammar_mode:
            self.prompt_suffix = ""
        return result

    def send_message_to_user(self, message: str):
        """
        Send a message to the user.

        Args:
            message: The message send to the user.
        """
        if self.send_message_to_user_callback:
            self.send_message_to_user_callback(message)
        else:
            print(message)

`init(llama_llm, messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, streaming_callback=None, k_last_messages_from_chat_history=0, system_prompt=None, llama_cpp_function_tools=None, basic_file_tools=False, allow_parallel_function_calling=False, add_send_message_to_user_function=True, send_message_to_user_callback=None, debug_output=False)`

Initialize the FunctionCallingAgent.

Parameters:

llama_llm (LlmProvider) –

The LLM Provider.
messages_formatter_type (MessagesFormatterType, default: CHATML ) –

Type of messages formatter.
custom_messages_formatter (MessagesFormatter, default: None ) –

Optional Custom messages formatter.
streaming_callback (Callable[[StreamingResponse], None], default: None ) –

Callback function for streaming responses.
k_last_messages_from_chat_history (int, default: 0 ) –

Number of last messages to consider from chat history.
system_prompt (str, default: None ) –

System prompt for interaction.
llama_cpp_function_tools (List[LlamaCppFunctionTool], default: None ) –

List of LlamaCppFunctionTool instances.
allow_parallel_function_calling (bool, default: False ) –

Allow parallel function calling (Default=False)
add_send_message_to_user_function (bool, default: True ) –

Flag to add send_message_to_user function.
send_message_to_user_callback (Callable[[str], None], default: None ) –

Callback for sending a message to the user.
debug_output (bool, default: False ) –

Enable debug output.

Source code in llama_cpp_agent/function_calling_agent.py

    def __init__(
        self,
        llama_llm: LlmProvider,
        messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        k_last_messages_from_chat_history: int = 0,
        system_prompt: str = None,
        llama_cpp_function_tools: [LlamaCppFunctionTool] = None,
        basic_file_tools: bool = False,
        allow_parallel_function_calling=False,
        add_send_message_to_user_function: bool = True,
        send_message_to_user_callback: Callable[[str], None] = None,
        debug_output: bool = False,
    ):
        """
        Initialize the FunctionCallingAgent.

        Args:
            llama_llm (LlmProvider): The LLM Provider.
            messages_formatter_type (MessagesFormatterType): Type of messages formatter.
            custom_messages_formatter (MessagesFormatter): Optional Custom messages formatter.
            streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
            k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
            system_prompt (str): System prompt for interaction.
            llama_cpp_function_tools(List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
            allow_parallel_function_calling (bool): Allow parallel function calling (Default=False)
            add_send_message_to_user_function (bool): Flag to add send_message_to_user function.
            send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
            debug_output (bool): Enable debug output.
        """
        self.llama_cpp_tools = []
        if llama_cpp_function_tools:
            self.llama_cpp_tools = llama_cpp_function_tools

        self.send_message_to_user_callback = send_message_to_user_callback
        if add_send_message_to_user_function:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(send_message, agent=self))

        if basic_file_tools:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(read_text_file))
            self.llama_cpp_tools.append(
                LlamaCppFunctionTool(write_text_file, agent=self)
            )

        self.allow_parallel_function_calling = allow_parallel_function_calling

        self.structured_output_settings = (
            LlmStructuredOutputSettings.from_llama_cpp_function_tools(
                self.llama_cpp_tools, self.allow_parallel_function_calling
            )
        )
        self.structured_output_settings.add_thoughts_and_reasoning_field = True
        self.without_grammar_mode = False
        self.without_grammar_mode_function = []
        self.prompt_suffix = ""
        if system_prompt is not None:
            self.system_prompt = system_prompt
        else:
            self.system_prompt = """You are Funky, an AI assistant that calls functions to perform tasks. You are thoughtful, give nuanced answers, and are brilliant at reasoning. Below is a list of functions you can use to interact with the system. Each function has specific parameters and requirements. Make sure to follow the instructions for each function carefully.
Choose the appropriate function based on the task you want to perform. Provide your function calls in JSON format."""
        self.llama_cpp_agent = LlamaCppAgent(
            llama_llm,
            debug_output=debug_output,
            system_prompt=self.system_prompt,
            predefined_messages_formatter_type=messages_formatter_type,
            custom_messages_formatter=custom_messages_formatter,
        )

        self.k_last_messages_from_chat_history = k_last_messages_from_chat_history
        self.streaming_callback = streaming_callback

`load_from_dict(agent_dict)` `staticmethod`

Load the agent's state from a dictionary.

Parameters:

agent_dict (dict) –

The dictionary containing the agent's state.

Returns:

FunctionCallingAgent ( FunctionCallingAgent ) –

The loaded FunctionCallingAgent instance.

Source code in llama_cpp_agent/function_calling_agent.py

@staticmethod
def load_from_dict(agent_dict: dict) -> "FunctionCallingAgent":
    """
    Load the agent's state from a dictionary.

    Args:
        agent_dict (dict): The dictionary containing the agent's state.

    Returns:
        FunctionCallingAgent: The loaded FunctionCallingAgent instance.
    """
    return FunctionCallingAgent(**agent_dict)

`as_dict()`

Convert the agent's state to a dictionary.

Returns:

dict ( dict ) –

The dictionary representation of the agent's state.

Source code in llama_cpp_agent/function_calling_agent.py

def as_dict(self) -> dict:
    """
    Convert the agent's state to a dictionary.

    Returns:
       dict: The dictionary representation of the agent's state.
    """
    return self.__dict__

`send_message_to_user(message)`

Send a message to the user.

Parameters:

message (str) –

The message send to the user.

Source code in llama_cpp_agent/function_calling_agent.py

def send_message_to_user(self, message: str):
    """
    Send a message to the user.

    Args:
        message: The message send to the user.
    """
    if self.send_message_to_user_callback:
        self.send_message_to_user_callback(message)
    else:
        print(message)

Structured Output Agent

`llama_cpp_agent.structured_output_agent`

`StructuredOutputAgent`

An agent that creates structured output based on pydantic models from unstructured text.

Parameters:

llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]) –

An instance of Llama, LlamaLLMSettings, LlamaCppServerLLMSettings, OpenAIEndpointSettings as LLM.
llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]) –

Generation settings for Llama or LlamaCppServer.
messages_formatter_type (MessagesFormatterType, default: CHATML ) –

Type of messages formatter.
custom_messages_formatter (MessagesFormatter, default: None ) –

Custom messages formatter.
streaming_callback (Callable[[StreamingResponse], None], default: None ) –

Callback function for streaming responses.
debug_output (bool, default: False ) –

Enable debug output.

Attributes:

llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppServerGenerationSettings]) –

Generation settings for Llama or LlamaCppServer.
grammar_cache (dict) –

Cache for generated grammars.
system_prompt_template (PromptTemplate) –

Template for the system prompt.
creation_prompt_template (PromptTemplate) –

Template for the creation prompt.
llama_cpp_agent (LlamaCppAgent) –

LlamaCppAgent instance for interaction.
streaming_callback (Callable[[StreamingResponse], None]) –

Callback function for streaming responses.

Methods:

save –

str): Save the agent's state to a file.
load_from_file –

str, llama_llm, streaming_callback) -> StructuredOutputAgent: Load the agent's state from a file.
load_from_dict –

dict) -> StructuredOutputAgent: Load the agent's state from a dictionary.
as_dict –

Convert the agent's state to a dictionary.
create_object –

Type[BaseModel], data: str = "") -> object: Create an object of the given model from the given data.

Source code in llama_cpp_agent/structured_output_agent.py

class StructuredOutputAgent:
    """
    An agent that creates structured output based on pydantic models from unstructured text.

    Args:
        llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]): An instance of Llama, LlamaLLMSettings, LlamaCppServerLLMSettings, OpenAIEndpointSettings as LLM.
        llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]): Generation settings for Llama or LlamaCppServer.
        messages_formatter_type (MessagesFormatterType): Type of messages formatter.
        custom_messages_formatter (MessagesFormatter): Custom messages formatter.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
        debug_output (bool): Enable debug output.

    Attributes:
        llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppServerGenerationSettings]): Generation settings for Llama or LlamaCppServer.
        grammar_cache (dict): Cache for generated grammars.
        system_prompt_template (PromptTemplate): Template for the system prompt.
        creation_prompt_template (PromptTemplate): Template for the creation prompt.
        llama_cpp_agent (LlamaCppAgent): LlamaCppAgent instance for interaction.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.

    Methods:
        save(file_path: str): Save the agent's state to a file.
        load_from_file(file_path: str, llama_llm, streaming_callback) -> StructuredOutputAgent: Load the agent's state from a file.
        load_from_dict(agent_dict: dict) -> StructuredOutputAgent: Load the agent's state from a dictionary.
        as_dict() -> dict: Convert the agent's state to a dictionary.
        create_object(model: Type[BaseModel], data: str = "") -> object: Create an object of the given model from the given data.

    """

    def __init__(
        self,
        llama_llm: LlmProvider,
        messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        debug_output: bool = False,
    ):
        """
        Initialize the StructuredOutputAgent.

        Args:
            llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]): An instance of Llama, LlamaLLMSettings, or LlamaCppServerLLMSettings as LLM.
            llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]): Generation settings for Llama or LlamaCppServer or OpenAIEndpoint.
            messages_formatter_type (MessagesFormatterType): Type of messages formatter.
            custom_messages_formatter (MessagesFormatter): Custom messages formatter.
            streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
            debug_output (bool): Enable debug output.
        """
        self.grammar_cache = {}
        self.system_prompt_template = PromptTemplate.from_string(
            "You are an advanced AI agent. You are tasked to assist the user by creating structured output in JSON format.\n\n{documentation}"
        )
        self.creation_prompt_template = PromptTemplate.from_string(
            "Create an JSON response based on the following input.\n\nInput:\n\n{user_input}"
        )

        self.llama_cpp_agent = LlamaCppAgent(
            llama_llm,
            debug_output=debug_output,
            system_prompt="",
            predefined_messages_formatter_type=messages_formatter_type,
            custom_messages_formatter=custom_messages_formatter,
        )
        self.streaming_callback = streaming_callback

    def save(self, file_path: str):
        """
        Save the agent's state to a file.

        Args:
            file_path (str): The path to the file.
        """
        with open(file_path, "w", encoding="utf-8") as file:
            dic = copy(self.as_dict())
            del dic["llama_cpp_agent"]
            del dic["grammar_cache"]
            del dic["system_prompt_template"]
            del dic["creation_prompt_template"]
            del dic["streaming_callback"]
            dic["debug_output"] = self.llama_cpp_agent.debug_output
            dic["llama_generation_settings"] = self.llama_generation_settings.as_dict()
            dic[
                "custom_messages_formatter"
            ] = self.llama_cpp_agent.messages_formatter.as_dict()
            json.dump(dic, file, indent=4)

    def as_dict(self) -> dict:
        """
        Convert the agent's state to a dictionary.

        Returns:
            dict: The dictionary representation of the agent's state.
        """
        return self.__dict__

    def create_object(
        self,
        model: Type[BaseModel],
        data: str = "",
        llm_sampling_settings: LlmSamplingSettings = None,
        returns_streaming_generator: bool = False,
    ) -> object:
        """
        Creates an object of the given model from the given data.

        Args:
            model (Type[BaseModel]): The model to create the object from.
            data (str): The data to create the object from.

        Returns:
            object: The created object.
        """
        output_settings = LlmStructuredOutputSettings.from_pydantic_models(
            [model], output_type=LlmStructuredOutputType.object_instance
        )

        system_prompt = self.system_prompt_template.generate_prompt(
            {
                "documentation": output_settings.get_llm_documentation(
                    self.llama_cpp_agent.provider
                ).strip()
            }
        )
        if data == "":
            prompt = "Create a random JSON response based on the response model."
        else:
            prompt = self.creation_prompt_template.generate_prompt({"user_input": data})
        response = self.llama_cpp_agent.get_chat_response(
            prompt,
            system_prompt=system_prompt,
            returns_streaming_generator=returns_streaming_generator,
            add_response_to_chat_history=False,
            add_message_to_chat_history=False,
            streaming_callback=self.streaming_callback,
            structured_output_settings=output_settings,
            llm_sampling_settings=llm_sampling_settings,
        )
        return response

`init(llama_llm, messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, streaming_callback=None, debug_output=False)`

Initialize the StructuredOutputAgent.

Parameters:

llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]) –

An instance of Llama, LlamaLLMSettings, or LlamaCppServerLLMSettings as LLM.
llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]) –

Generation settings for Llama or LlamaCppServer or OpenAIEndpoint.
messages_formatter_type (MessagesFormatterType, default: CHATML ) –

Type of messages formatter.
custom_messages_formatter (MessagesFormatter, default: None ) –

Custom messages formatter.
streaming_callback (Callable[[StreamingResponse], None], default: None ) –

Callback function for streaming responses.
debug_output (bool, default: False ) –

Enable debug output.

Source code in llama_cpp_agent/structured_output_agent.py

def __init__(
    self,
    llama_llm: LlmProvider,
    messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
    custom_messages_formatter: MessagesFormatter = None,
    streaming_callback: Callable[[StreamingResponse], None] = None,
    debug_output: bool = False,
):
    """
    Initialize the StructuredOutputAgent.

    Args:
        llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]): An instance of Llama, LlamaLLMSettings, or LlamaCppServerLLMSettings as LLM.
        llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]): Generation settings for Llama or LlamaCppServer or OpenAIEndpoint.
        messages_formatter_type (MessagesFormatterType): Type of messages formatter.
        custom_messages_formatter (MessagesFormatter): Custom messages formatter.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
        debug_output (bool): Enable debug output.
    """
    self.grammar_cache = {}
    self.system_prompt_template = PromptTemplate.from_string(
        "You are an advanced AI agent. You are tasked to assist the user by creating structured output in JSON format.\n\n{documentation}"
    )
    self.creation_prompt_template = PromptTemplate.from_string(
        "Create an JSON response based on the following input.\n\nInput:\n\n{user_input}"
    )

    self.llama_cpp_agent = LlamaCppAgent(
        llama_llm,
        debug_output=debug_output,
        system_prompt="",
        predefined_messages_formatter_type=messages_formatter_type,
        custom_messages_formatter=custom_messages_formatter,
    )
    self.streaming_callback = streaming_callback

`save(file_path)`

Save the agent's state to a file.

Parameters:

file_path (str) –

The path to the file.

Source code in llama_cpp_agent/structured_output_agent.py

def save(self, file_path: str):
    """
    Save the agent's state to a file.

    Args:
        file_path (str): The path to the file.
    """
    with open(file_path, "w", encoding="utf-8") as file:
        dic = copy(self.as_dict())
        del dic["llama_cpp_agent"]
        del dic["grammar_cache"]
        del dic["system_prompt_template"]
        del dic["creation_prompt_template"]
        del dic["streaming_callback"]
        dic["debug_output"] = self.llama_cpp_agent.debug_output
        dic["llama_generation_settings"] = self.llama_generation_settings.as_dict()
        dic[
            "custom_messages_formatter"
        ] = self.llama_cpp_agent.messages_formatter.as_dict()
        json.dump(dic, file, indent=4)

`as_dict()`

Convert the agent's state to a dictionary.

Returns:

dict ( dict ) –

The dictionary representation of the agent's state.

Source code in llama_cpp_agent/structured_output_agent.py

def as_dict(self) -> dict:
    """
    Convert the agent's state to a dictionary.

    Returns:
        dict: The dictionary representation of the agent's state.
    """
    return self.__dict__

`create_object(model, data='', llm_sampling_settings=None, returns_streaming_generator=False)`

Creates an object of the given model from the given data.

Parameters:

model (Type[BaseModel]) –

The model to create the object from.
data (str, default: '' ) –

The data to create the object from.

Returns:

object ( object ) –

The created object.

Source code in llama_cpp_agent/structured_output_agent.py

def create_object(
    self,
    model: Type[BaseModel],
    data: str = "",
    llm_sampling_settings: LlmSamplingSettings = None,
    returns_streaming_generator: bool = False,
) -> object:
    """
    Creates an object of the given model from the given data.

    Args:
        model (Type[BaseModel]): The model to create the object from.
        data (str): The data to create the object from.

    Returns:
        object: The created object.
    """
    output_settings = LlmStructuredOutputSettings.from_pydantic_models(
        [model], output_type=LlmStructuredOutputType.object_instance
    )

    system_prompt = self.system_prompt_template.generate_prompt(
        {
            "documentation": output_settings.get_llm_documentation(
                self.llama_cpp_agent.provider
            ).strip()
        }
    )
    if data == "":
        prompt = "Create a random JSON response based on the response model."
    else:
        prompt = self.creation_prompt_template.generate_prompt({"user_input": data})
    response = self.llama_cpp_agent.get_chat_response(
        prompt,
        system_prompt=system_prompt,
        returns_streaming_generator=returns_streaming_generator,
        add_response_to_chat_history=False,
        add_message_to_chat_history=False,
        streaming_callback=self.streaming_callback,
        structured_output_settings=output_settings,
        llm_sampling_settings=llm_sampling_settings,
    )
    return response

Misc

Messages Formatter

`llama_cpp_agent.messages_formatter`

`MessagesFormatterType`

Bases: Enum

Enum representing different types of predefined messages formatters.

Source code in llama_cpp_agent/messages_formatter.py

class MessagesFormatterType(Enum):
    """
    Enum representing different types of predefined messages formatters.
    """

    MISTRAL = 1
    CHATML = 2
    VICUNA = 3
    LLAMA_2 = 4
    SYNTHIA = 5
    NEURAL_CHAT = 6
    SOLAR = 7
    OPEN_CHAT = 8
    ALPACA = 9
    CODE_DS = 10
    B22 = 11
    LLAMA_3 = 12
    PHI_3 = 13
    OPEN_INTERPRETER = 14
    AUTOCODER = 15
    GEMMA_2 = 16
    DEEP_SEEK_CODER_2 = 17
    PHI_4 = 18
    DEEPSEEK_R1_DISTILL_QWEN = 19
    MISTRAL_SMALL_3 = 20

`deepseek_r1_distill_qwen_chat_prompt_markers = {Roles.system: PromptMarkers('<｜begin▁of▁sentence｜>', ''), Roles.user: PromptMarkers('<｜User｜>', ''), Roles.assistant: PromptMarkers('<｜Assistant｜>', ''), Roles.tool: PromptMarkers('', '')}` `module-attribute`

Instruction:

{prompt}

Response:

`get_predefined_messages_formatter(formatter_type)`

Gets a predefined messages formatter based on the formatter type.

Parameters:

formatter_type (MessagesFormatterType) –

The type of messages formatter.

Returns:

MessagesFormatter ( MessagesFormatter ) –

The predefined messages formatter.

Source code in llama_cpp_agent/messages_formatter.py

def get_predefined_messages_formatter(
        formatter_type: MessagesFormatterType,
) -> MessagesFormatter:
    """
    Gets a predefined messages formatter based on the formatter type.

    Args:
        formatter_type (MessagesFormatterType): The type of messages formatter.

    Returns:
        MessagesFormatter: The predefined messages formatter.
    """
    return predefined_formatter[formatter_type]

Prompt template

`llama_cpp_agent.llm_prompt_template`

`PromptTemplateField` `dataclass`

Data class representing a field in a prompt template.

Attributes:

name (str) –

The name of the template field.
value (str) –

The value associated with the template field.

Source code in llama_cpp_agent/llm_prompt_template.py

@dataclass
class PromptTemplateField:
    """
    Data class representing a field in a prompt template.

    Attributes:
        name (str): The name of the template field.
        value (str): The value associated with the template field.
    """

    name: str
    value: str

`PromptTemplateFields`

Class representing a collection of PromptTemplateField objects.

Methods:

add_field –

str, value: str): Add a new field to the collection.
remove_field –

str): Remove a field by name from the collection.
edit_field –

str, new_value: str): Edit the value of an existing field.
find_field –

str) -> PromptTemplateField: Find and return a field by name.
list_fields –

Get a list of all fields in the collection.
get_fields_dict –

Get a dictionary representation of the fields.
set_fields_from_dict –

Dict[str, str]): Set the fields using a dictionary.

Attributes:

fields (List[PromptTemplateField]) –

List of PromptTemplateField objects.

Source code in llama_cpp_agent/llm_prompt_template.py

class PromptTemplateFields:
    """
    Class representing a collection of PromptTemplateField objects.

    Methods:
        add_field(name: str, value: str): Add a new field to the collection.
        remove_field(name: str): Remove a field by name from the collection.
        edit_field(name: str, new_value: str): Edit the value of an existing field.
        find_field(name: str) -> PromptTemplateField: Find and return a field by name.
        list_fields() -> List[PromptTemplateField]: Get a list of all fields in the collection.
        get_fields_dict() -> Dict[str, str]: Get a dictionary representation of the fields.
        set_fields_from_dict(field_dict: Dict[str, str]): Set the fields using a dictionary.

    Attributes:
        fields (List[PromptTemplateField]): List of PromptTemplateField objects.
    """

    def __init__(self):
        self.fields: List[PromptTemplateField] = []

    def add_field(self, name: str, value: str):
        """Add a new field to the collection."""
        self.fields.append(PromptTemplateField(name, value))

    def remove_field(self, name: str):
        """Remove a field by name from the collection."""
        self.fields = [field for field in self.fields if field.name != name]

    def edit_field(self, name: str, new_value: str):
        """Edit the value of an existing field."""
        field = self.find_field(name)
        if field:
            field.value = new_value
        else:
            raise ValueError(f"Field '{name}' not found.")

    def find_field(self, name: str) -> PromptTemplateField:
        """Find and return a field by name."""
        for field in self.fields:
            if field.name == name:
                return field
        return None

    def list_fields(self):
        """Get a list of all fields in the collection."""
        return self.fields

    def get_fields_dict(self) -> Dict[str, str]:
        """Get a dictionary representation of the fields."""
        return {field.name: field.value for field in self.fields}

    def set_fields_from_dict(self, field_dict: Dict[str, str]):
        """Set the fields using a dictionary."""
        self.fields.clear()
        for name, value in field_dict.items():
            self.add_field(name, value)

`add_field(name, value)`

Add a new field to the collection.

Source code in llama_cpp_agent/llm_prompt_template.py

def add_field(self, name: str, value: str):
    """Add a new field to the collection."""
    self.fields.append(PromptTemplateField(name, value))

`remove_field(name)`

Remove a field by name from the collection.

Source code in llama_cpp_agent/llm_prompt_template.py

def remove_field(self, name: str):
    """Remove a field by name from the collection."""
    self.fields = [field for field in self.fields if field.name != name]

`edit_field(name, new_value)`

Edit the value of an existing field.

Source code in llama_cpp_agent/llm_prompt_template.py

def edit_field(self, name: str, new_value: str):
    """Edit the value of an existing field."""
    field = self.find_field(name)
    if field:
        field.value = new_value
    else:
        raise ValueError(f"Field '{name}' not found.")

`find_field(name)`

Find and return a field by name.

Source code in llama_cpp_agent/llm_prompt_template.py

def find_field(self, name: str) -> PromptTemplateField:
    """Find and return a field by name."""
    for field in self.fields:
        if field.name == name:
            return field
    return None

`list_fields()`

Get a list of all fields in the collection.

Source code in llama_cpp_agent/llm_prompt_template.py

def list_fields(self):
    """Get a list of all fields in the collection."""
    return self.fields

`get_fields_dict()`

Get a dictionary representation of the fields.

Source code in llama_cpp_agent/llm_prompt_template.py

def get_fields_dict(self) -> Dict[str, str]:
    """Get a dictionary representation of the fields."""
    return {field.name: field.value for field in self.fields}

`set_fields_from_dict(field_dict)`

Set the fields using a dictionary.

Source code in llama_cpp_agent/llm_prompt_template.py

def set_fields_from_dict(self, field_dict: Dict[str, str]):
    """Set the fields using a dictionary."""
    self.fields.clear()
    for name, value in field_dict.items():
        self.add_field(name, value)

`PromptTemplate`

Class representing a prompt template.

Methods:

generate_prompt –

Union[dict, PromptTemplateFields], remove_empty_template_field=True) -> str:

Class Methods

from_string(template_string: str) -> PromptTemplate: Create a PromptTemplate from a string. from_file(template_file: str) -> PromptTemplate: Create a PromptTemplate from a file.

Attributes:

template (str) –

The template string containing placeholders.

Source code in llama_cpp_agent/llm_prompt_template.py

class PromptTemplate:
    """
    Class representing a prompt template.

    Methods:
        generate_prompt(template_fields: Union[dict, PromptTemplateFields], remove_empty_template_field=True) -> str:
        Generate a prompt by replacing placeholders in the template with values.

    Class Methods:
        from_string(template_string: str) -> PromptTemplate:
        Create a PromptTemplate from a string.
        from_file(template_file: str) -> PromptTemplate:
        Create a PromptTemplate from a file.

    Attributes:
        template (str): The template string containing placeholders.
    """

    def __init__(self, template_file=None, template_string=None):
        """
        Initialize a PromptTemplate instance.

        Args:
            template_file (str): The path to a file containing the template.
            template_string (str): The template string.
        """
        if template_file:
            with open(template_file, "r") as file:
                self.template = file.read()
        elif template_string:
            self.template = template_string
        else:
            raise ValueError(
                "Either 'template_file' or 'template_string' must be provided"
            )

    @classmethod
    def from_string(cls, template_string):
        """
        Create a PromptTemplate instance from a string.

        Args:
            template_string (str): The template string.

        Returns:
            PromptTemplate: Created PromptTemplate instance.
        """
        return cls(template_string=template_string)

    @classmethod
    def from_file(cls, template_file):
        """
        Create a PromptTemplate instance from a file.

        Args:
            template_file (str): The path to a file containing the template.

        Returns:
            PromptTemplate: Created PromptTemplate instance.
        """
        with open(template_file, "r") as file:
            template_string = file.read()
        return cls(template_string=template_string)

    def _remove_empty_placeholders(self, text):
        """
        Remove lines that contain only the empty placeholder.

        Args:
            text (str): The text containing placeholders.

        Returns:
            str: Text with empty placeholders removed.
        """
        # Split text into lines
        lines = text.split('\n')
        # Process each line individually
        processed_lines = []
        for line in lines:
            if '__EMPTY_TEMPLATE_FIELD__' in line:
                new_line = line.replace('__EMPTY_TEMPLATE_FIELD__', '')
                if new_line.strip():
                    processed_lines.append(new_line)
            else:
                processed_lines.append(line)
        # Join the lines back into a single string
        return '\n'.join(processed_lines)

    def generate_prompt(
        self,
        template_fields: Union[dict, PromptTemplateFields],
        remove_empty_template_field=True,
    ) -> str:
        """
        Generate a prompt by replacing placeholders in the template with values.

        Args:
            template_fields (Union[dict, PromptTemplateFields]): The template fields.
            remove_empty_template_field (bool): If True, removes lines with empty placeholders.

        Returns:
            str: The generated prompt.
        """
        cleaned_fields = {}
        for key, value in template_fields.items():
            cleaned_fields[key] = str(value) if not isinstance(value, str) else value

        template_fields = cleaned_fields
        if isinstance(template_fields, PromptTemplateFields):
            template_fields = template_fields.get_fields_dict()

        if not remove_empty_template_field:

            def replace_placeholder(match):
                placeholder = match.group(1)
                return template_fields.get(placeholder, match.group(0))

            prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)
            return prompt

        def replace_placeholder(match):
            placeholder = match.group(1)
            if template_fields.get(placeholder, match.group(0)) != "":
                return template_fields.get(placeholder, match.group(0))
            return "__EMPTY_TEMPLATE_FIELD__"

        # Initial placeholder replacement
        prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)

        return self._remove_empty_placeholders(prompt)

`init(template_file=None, template_string=None)`

Initialize a PromptTemplate instance.

Parameters:

template_file (str, default: None ) –

The path to a file containing the template.
template_string (str, default: None ) –

The template string.

Source code in llama_cpp_agent/llm_prompt_template.py

def __init__(self, template_file=None, template_string=None):
    """
    Initialize a PromptTemplate instance.

    Args:
        template_file (str): The path to a file containing the template.
        template_string (str): The template string.
    """
    if template_file:
        with open(template_file, "r") as file:
            self.template = file.read()
    elif template_string:
        self.template = template_string
    else:
        raise ValueError(
            "Either 'template_file' or 'template_string' must be provided"
        )

`from_string(template_string)` `classmethod`

Create a PromptTemplate instance from a string.

Parameters:

template_string (str) –

The template string.

Returns:

PromptTemplate –

Created PromptTemplate instance.

Source code in llama_cpp_agent/llm_prompt_template.py

@classmethod
def from_string(cls, template_string):
    """
    Create a PromptTemplate instance from a string.

    Args:
        template_string (str): The template string.

    Returns:
        PromptTemplate: Created PromptTemplate instance.
    """
    return cls(template_string=template_string)

`from_file(template_file)` `classmethod`

Create a PromptTemplate instance from a file.

Parameters:

template_file (str) –

The path to a file containing the template.

Returns:

PromptTemplate –

Created PromptTemplate instance.

Source code in llama_cpp_agent/llm_prompt_template.py

@classmethod
def from_file(cls, template_file):
    """
    Create a PromptTemplate instance from a file.

    Args:
        template_file (str): The path to a file containing the template.

    Returns:
        PromptTemplate: Created PromptTemplate instance.
    """
    with open(template_file, "r") as file:
        template_string = file.read()
    return cls(template_string=template_string)

`generate_prompt(template_fields, remove_empty_template_field=True)`

Generate a prompt by replacing placeholders in the template with values.

Parameters:

template_fields (Union[dict, PromptTemplateFields]) –

The template fields.
remove_empty_template_field (bool, default: True ) –

If True, removes lines with empty placeholders.

Returns:

str ( str ) –

The generated prompt.

Source code in llama_cpp_agent/llm_prompt_template.py

def generate_prompt(
    self,
    template_fields: Union[dict, PromptTemplateFields],
    remove_empty_template_field=True,
) -> str:
    """
    Generate a prompt by replacing placeholders in the template with values.

    Args:
        template_fields (Union[dict, PromptTemplateFields]): The template fields.
        remove_empty_template_field (bool): If True, removes lines with empty placeholders.

    Returns:
        str: The generated prompt.
    """
    cleaned_fields = {}
    for key, value in template_fields.items():
        cleaned_fields[key] = str(value) if not isinstance(value, str) else value

    template_fields = cleaned_fields
    if isinstance(template_fields, PromptTemplateFields):
        template_fields = template_fields.get_fields_dict()

    if not remove_empty_template_field:

        def replace_placeholder(match):
            placeholder = match.group(1)
            return template_fields.get(placeholder, match.group(0))

        prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)
        return prompt

    def replace_placeholder(match):
        placeholder = match.group(1)
        if template_fields.get(placeholder, match.group(0)) != "":
            return template_fields.get(placeholder, match.group(0))
        return "__EMPTY_TEMPLATE_FIELD__"

    # Initial placeholder replacement
    prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)

    return self._remove_empty_placeholders(prompt)

Agents

Agents

llama_cpp_agent.llm_agent

StreamingResponse dataclass

__init__(text, is_last_response)

LlamaCppAgent

__init__(provider, name='llamacpp_agent', system_prompt='You are a helpful assistant.', predefined_messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, chat_history=None, add_tools_and_structures_documentation_to_system_prompt=True, debug_output=False)

add_message(message, role)

get_text_response(prompt=None, structured_output_settings=None, llm_sampling_settings=None, streaming_callback=None, returns_streaming_generator=False, print_output=False)

Structured Output Settings

llama_cpp_agent.llm_output_settings.settings

LlmStructuredOutputType

LlmStructuredOutputSettings

from_llama_cpp_function_tools(llama_cpp_function_tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False) staticmethod

from_pydantic_models(models, output_type, add_thoughts_and_reasoning_field=False) staticmethod

from_open_ai_tools(tools, allow_parallel_function_calling=False) staticmethod

from_functions(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False) staticmethod

from_llama_index_tools(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False) staticmethod

to_openai_tools()

add_llama_cpp_function_tool(tool)

add_pydantic_model(model, name=None)

add_open_ai_tool(open_ai_schema_and_function, name=None)

add_function_tool(function, name=None)

add_llama_index_tool(tool, name=None)

get_llm_documentation(provider)

get_gbnf_grammar()

get_json_schema()

add_function_name_to_heartbeat_list(function_name)

add_all_current_functions_to_heartbeat_list(excluded=None)

handle_function_call(function_call_response)

intern_function_call(function_call)

intern_parallel_function_call(function_calls)

Function Calling Agent

llama_cpp_agent.function_calling_agent

activate_message_mode

send_message

write_text_file

write_file(content)

read_text_file

read_file()

FunctionCallingAgent

load_from_dict(agent_dict) staticmethod

as_dict()

send_message_to_user(message)

Structured Output Agent

llama_cpp_agent.structured_output_agent

StructuredOutputAgent

__init__(llama_llm, messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, streaming_callback=None, debug_output=False)

save(file_path)

as_dict()

create_object(model, data='', llm_sampling_settings=None, returns_streaming_generator=False)

Misc

Messages Formatter

llama_cpp_agent.messages_formatter

MessagesFormatterType

deepseek_r1_distill_qwen_chat_prompt_markers = {Roles.system: PromptMarkers('<｜begin▁of▁sentence｜>', ''), Roles.user: PromptMarkers('<｜User｜>', ''), Roles.assistant: PromptMarkers('<｜Assistant｜>', ''), Roles.tool: PromptMarkers('', '')} module-attribute

Instruction:

Response:

get_predefined_messages_formatter(formatter_type)

Prompt template

llama_cpp_agent.llm_prompt_template

PromptTemplateField dataclass

PromptTemplateFields

add_field(name, value)

remove_field(name)

edit_field(name, new_value)

find_field(name)

list_fields()

get_fields_dict()

set_fields_from_dict(field_dict)

PromptTemplate

__init__(template_file=None, template_string=None)

from_string(template_string) classmethod

from_file(template_file) classmethod

generate_prompt(template_fields, remove_empty_template_field=True)

`llama_cpp_agent.llm_agent`

`StreamingResponse` `dataclass`

`init(text, is_last_response)`

`LlamaCppAgent`

`init(provider, name='llamacpp_agent', system_prompt='You are a helpful assistant.', predefined_messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, chat_history=None, add_tools_and_structures_documentation_to_system_prompt=True, debug_output=False)`

`add_message(message, role)`

`get_text_response(prompt=None, structured_output_settings=None, llm_sampling_settings=None, streaming_callback=None, returns_streaming_generator=False, print_output=False)`

`llama_cpp_agent.llm_output_settings.settings`

`LlmStructuredOutputType`

`LlmStructuredOutputSettings`

`from_llama_cpp_function_tools(llama_cpp_function_tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False)` `staticmethod`

`from_pydantic_models(models, output_type, add_thoughts_and_reasoning_field=False)` `staticmethod`

`from_open_ai_tools(tools, allow_parallel_function_calling=False)` `staticmethod`

`from_functions(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False)` `staticmethod`

`from_llama_index_tools(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False)` `staticmethod`

`to_openai_tools()`

`add_llama_cpp_function_tool(tool)`

`add_pydantic_model(model, name=None)`

`add_open_ai_tool(open_ai_schema_and_function, name=None)`

`add_function_tool(function, name=None)`

`add_llama_index_tool(tool, name=None)`

`get_llm_documentation(provider)`

`get_gbnf_grammar()`

`get_json_schema()`

`add_function_name_to_heartbeat_list(function_name)`

`add_all_current_functions_to_heartbeat_list(excluded=None)`

`handle_function_call(function_call_response)`

`intern_function_call(function_call)`

`intern_parallel_function_call(function_calls)`

`llama_cpp_agent.function_calling_agent`

`activate_message_mode`

`send_message`

`write_text_file`

`write_file(content)`

`read_text_file`

`read_file()`

`FunctionCallingAgent`

`load_from_dict(agent_dict)` `staticmethod`

`as_dict()`

`send_message_to_user(message)`

`llama_cpp_agent.structured_output_agent`

`StructuredOutputAgent`

`init(llama_llm, messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, streaming_callback=None, debug_output=False)`

`save(file_path)`

`as_dict()`

`create_object(model, data='', llm_sampling_settings=None, returns_streaming_generator=False)`

`llama_cpp_agent.messages_formatter`

`MessagesFormatterType`

`deepseek_r1_distill_qwen_chat_prompt_markers = {Roles.system: PromptMarkers('<｜begin▁of▁sentence｜>', ''), Roles.user: PromptMarkers('<｜User｜>', ''), Roles.assistant: PromptMarkers('<｜Assistant｜>', ''), Roles.tool: PromptMarkers('', '')}` `module-attribute`

`get_predefined_messages_formatter(formatter_type)`

`llama_cpp_agent.llm_prompt_template`

`PromptTemplateField` `dataclass`

`PromptTemplateFields`

`add_field(name, value)`

`remove_field(name)`

`edit_field(name, new_value)`

`find_field(name)`

`list_fields()`

`get_fields_dict()`

`set_fields_from_dict(field_dict)`

`PromptTemplate`

`init(template_file=None, template_string=None)`

`from_string(template_string)` `classmethod`

`from_file(template_file)` `classmethod`

`generate_prompt(template_fields, remove_empty_template_field=True)`