Skip to content

Agents

Agents

llama_cpp_agent.llm_agent

StreamingResponse dataclass

Represents a streaming response with text and an indicator for the last response.

Source code in llama_cpp_agent/llm_agent.py
@dataclass
class StreamingResponse:
    """
    Represents a streaming response with text and an indicator for the last response.
    """

    text: str
    is_last_response: bool

    def __init__(self, text: str, is_last_response: bool):
        """
        Initializes a new StreamingResponse object.

        Args:
            text (str): The text content of the streaming response.
            is_last_response (bool): Indicates whether this is the last response in the stream.
        """
        self.text = text
        self.is_last_response = is_last_response
__init__(text, is_last_response)

Initializes a new StreamingResponse object.

Parameters:

  • text (str) –

    The text content of the streaming response.

  • is_last_response (bool) –

    Indicates whether this is the last response in the stream.

Source code in llama_cpp_agent/llm_agent.py
def __init__(self, text: str, is_last_response: bool):
    """
    Initializes a new StreamingResponse object.

    Args:
        text (str): The text content of the streaming response.
        is_last_response (bool): Indicates whether this is the last response in the stream.
    """
    self.text = text
    self.is_last_response = is_last_response

LlamaCppAgent

A base agent that can be used for chat, structured output and function calling.

Source code in llama_cpp_agent/llm_agent.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
class LlamaCppAgent:
    """
    A base agent that can be used for chat, structured output and function calling.
    """

    def __init__(
            self,
            provider: LlmProvider,
            name: str = "llamacpp_agent",
            system_prompt: str = "You are a helpful assistant.",
            predefined_messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
            custom_messages_formatter: MessagesFormatter = None,
            chat_history: ChatHistory = None,
            add_tools_and_structures_documentation_to_system_prompt: bool = True,
            debug_output: bool = False,
    ):
        """
        Initializes a new LlamaCppAgent object.

        Args:
           provider (LlmProvider):The underlying llm provider (LlamaCppServerProvider, LlamaCppPythonProvider, TGIServerProvider or VLLMServerProvider).
           name (str): The name of the agent.
           system_prompt (str): The system prompt used in chat interactions.
           predefined_messages_formatter_type (MessagesFormatterType): The type of predefined messages formatter.
           custom_messages_formatter (MessagesFormatter): Custom message's formatter.
           chat_history (ChatHistory): This will handle the chat history.
           add_tools_and_structures_documentation_to_system_prompt (bool): Will suffix system prompt dynamically with documentation for function calling or structured output.
           debug_output (bool): Indicates whether debug output should be enabled.
        """
        self.provider = provider
        self.name = name
        self.debug_output = debug_output
        if custom_messages_formatter is not None:
            self.messages_formatter = custom_messages_formatter
        else:
            self.messages_formatter = get_predefined_messages_formatter(
                predefined_messages_formatter_type
            )
        self.last_response = ""
        if chat_history is None:
            self.chat_history = BasicChatHistory()
        else:
            self.chat_history = chat_history

        self.add_message(role=Roles.system, message=system_prompt)
        self.system_prompt = system_prompt
        self.add_tools_and_structures_documentation_to_system_prompt = add_tools_and_structures_documentation_to_system_prompt

    def add_message(
            self,
            message: str,
            role: Roles,
    ):
        """
        Adds a message to the chat history.

        Args:
            message (str): The content of the message.
            role (Literal["system"] | Literal["user"] | Literal["assistant"] | Literal["tool"]): The role of the message sender.
        """
        self.chat_history.add_message(
            {
                "role": role,
                "content": message,
            }
        )

    def get_text_response(
            self,
            prompt: str = None,
            structured_output_settings: LlmStructuredOutputSettings = None,
            llm_sampling_settings: LlmSamplingSettings = None,
            streaming_callback: Callable[[StreamingResponse], None] = None,
            returns_streaming_generator: bool = False,
            print_output: bool = False,
    ) -> Union[
        str,
        List[dict],
        BaseModel,
        Generator[Any, Any, str | BaseModel | list[BaseModel]],
    ]:
        """
        Get a text response from the LLM provider.

        Args:
            prompt (str | list[int]): The prompt for the LLM.
            structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
            llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
            streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
            returns_streaming_generator (bool): Whether to return a generator streaming the results.
            print_output (bool): Whether to print the output.

        Returns:
            Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated response. A string message, a list of function calls, an object from structured output or a generator for the response
        """

        if self.debug_output:
            if type(prompt) is str:
                print(prompt, end="")

        if structured_output_settings is None:
            structured_output_settings = LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.no_structured_output
            )
        if llm_sampling_settings is None:
            llm_sampling_settings = self.provider.get_provider_default_settings()
        else:
            llm_sampling_settings = deepcopy(llm_sampling_settings)

        if llm_sampling_settings.get_additional_stop_sequences() is not None:
            llm_sampling_settings.add_additional_stop_sequences(
                self.messages_formatter.default_stop_sequences
            )

        if self.provider:
            completion = self.get_text_completion(
                prompt=prompt,
                structured_output_settings=structured_output_settings,
                llm_samplings_settings=llm_sampling_settings,
            )

            def stream_results():
                full_response_stream = ""
                for out_stream in completion:
                    out_text = out_stream["choices"][0]["text"]
                    full_response_stream += out_text
                    yield out_text

                return structured_output_settings.handle_structured_output(
                    full_response_stream, provider=self.provider
                )

            if llm_sampling_settings.is_streaming():
                full_response = ""
                if returns_streaming_generator:
                    return stream_results()
                for out in completion:
                    text = out["choices"][0]["text"]
                    full_response += text
                    if streaming_callback is not None:
                        streaming_callback(
                            StreamingResponse(text=text, is_last_response=False)
                        )
                    if print_output:
                        print(text, end="")
                if streaming_callback is not None:
                    streaming_callback(
                        StreamingResponse(text="", is_last_response=True)
                    )
                if print_output or self.debug_output:
                    print("")
                self.last_response = full_response
                return structured_output_settings.handle_structured_output(
                    full_response, provider=self.provider
                )
            else:
                full_response = ""
                text = completion["choices"][0]["text"]
                full_response += text
                if print_output or self.debug_output:
                    print(full_response)
                self.last_response = full_response
                return structured_output_settings.handle_structured_output(
                    full_response, provider=self.provider
                )
        return "Error: No model loaded!"

    def get_chat_response(
            self,
            message: str = None,
            role: Roles = Roles.user,
            prompt_suffix: str = None,
            chat_history: ChatHistory = None,
            system_prompt: str = None,
            system_prompt_modules: list[SystemPromptModule] = None,
            add_message_to_chat_history: bool = True,
            add_response_to_chat_history: bool = True,
            structured_output_settings: LlmStructuredOutputSettings = None,
            llm_sampling_settings: LlmSamplingSettings = None,
            streaming_callback: Callable[[StreamingResponse], None] = None,
            returns_streaming_generator: bool = False,
            print_output: bool = False,
    ) -> Union[
        str,
        List[dict],
        BaseModel,
        Generator[Any, Any, str | BaseModel | list[BaseModel]],
    ]:
        """
        Get a chat response based on the input message and context.

        Args:
            message (str): The input message.
            role (Literal["system", "user", "assistant", "tool"]): The role of the message sender.
            prompt_suffix (str): Suffix to append after the prompt.
            chat_history (ChatHistory): Overwrite internal ChatHistory of the agent.
            system_prompt (str): Overwrites the system prompt set on the agent initialization.
            system_prompt_modules (SystemPromptModules): Additional sections added to the system prompt.
            add_message_to_chat_history (bool): Whether to add the input message to the chat history.
            add_response_to_chat_history (bool): Whether to add the generated response to the chat history.
            structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
            llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
            streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
            returns_streaming_generator (bool): Whether to return a generator streaming the results.
            print_output (bool): Whether to print the generated response.

        Returns:
            Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated chat response. A string message, a list of function calls, an object from structured output or a generator for the response
        """
        if chat_history is None:
            chat_history = self.chat_history

        if structured_output_settings is None:
            structured_output_settings = LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.no_structured_output
            )
        if llm_sampling_settings is None:
            llm_sampling_settings = self.provider.get_provider_default_settings()
        else:
            llm_sampling_settings = deepcopy(llm_sampling_settings)

        if llm_sampling_settings.get_additional_stop_sequences() is not None:
            llm_sampling_settings.add_additional_stop_sequences(
                self.messages_formatter.default_stop_sequences
            )

        completion, response_role = self.get_response_role_and_completion(
            message=message,
            chat_history=chat_history,
            system_prompt=system_prompt,
            system_prompt_modules=system_prompt_modules,
            add_message_to_chat_history=add_message_to_chat_history,
            role=role,
            prompt_suffix=prompt_suffix,
            structured_output_settings=structured_output_settings,
            llm_sampling_settings=llm_sampling_settings,
        )

        def stream_results():
            full_response_stream = ""
            for out_stream in completion:
                out_text = out_stream["choices"][0]["text"]
                if out_text != self.messages_formatter.eos_token:
                    full_response_stream += out_text
                    yield out_text
            if prompt_suffix:
                full_response_stream = prompt_suffix + full_response_stream
            self.last_response = full_response_stream
            if add_response_to_chat_history:
                chat_history.add_message(
                    {
                        "role": response_role,
                        "content": full_response_stream,
                    }
                )
            return structured_output_settings.handle_structured_output(
                full_response_stream, prompt_suffix=prompt_suffix, provider=self.provider
            )

        if self.provider:
            if returns_streaming_generator:
                return stream_results()
            if llm_sampling_settings.is_streaming():
                full_response = ""
                for out in completion:
                    text = out["choices"][0]["text"]
                    if text != self.messages_formatter.eos_token:
                        full_response += text
                        if streaming_callback is not None:
                            streaming_callback(
                                StreamingResponse(text=text, is_last_response=False)
                            )
                        if print_output or self.debug_output:
                            print(text, end="")
                if streaming_callback is not None:
                    streaming_callback(
                        StreamingResponse(text="", is_last_response=True)
                    )
                if print_output or self.debug_output:
                    print("")
                if prompt_suffix:
                    full_response = prompt_suffix + full_response
                self.last_response = full_response
                if add_response_to_chat_history:
                    chat_history.add_message(
                        {
                            "role": response_role,
                            "content": full_response,
                        }
                    )

                return structured_output_settings.handle_structured_output(
                    full_response, prompt_suffix=prompt_suffix, provider=self.provider
                )
            else:
                text = completion["choices"][0]["text"]
                if text.strip().endswith(self.messages_formatter.eos_token):
                    text = text.replace(self.messages_formatter.eos_token, "")
                if print_output or self.debug_output:
                    print(text)
                if prompt_suffix:
                    text = prompt_suffix + text
                self.last_response = text
                if add_response_to_chat_history:
                    chat_history.add_message(
                        {
                            "role": response_role,
                            "content": text,
                        }
                    )

                return structured_output_settings.handle_structured_output(text, prompt_suffix=prompt_suffix, provider=self.provider)
        return "Error: No model loaded!"

    def get_text_completion(
            self,
            prompt: str | list[int] = None,
            structured_output_settings: LlmStructuredOutputSettings = None,
            llm_samplings_settings: LlmSamplingSettings = None,
    ):
        return self.provider.create_completion(
            prompt,
            structured_output_settings,
            llm_samplings_settings,
            self.messages_formatter.bos_token,
        )

    def get_response_role_and_completion(
            self,
            message: str = None,
            chat_history: ChatHistory = None,
            system_prompt: str = None,
            system_prompt_modules: list[SystemPromptModule] = None,
            add_message_to_chat_history: bool = True,
            role: Roles = Roles.user,
            prompt_suffix: str = None,
            llm_sampling_settings: LlmSamplingSettings = None,
            structured_output_settings: LlmStructuredOutputSettings = None,
    ):
        if len(chat_history.get_chat_messages()) == 0:
            if system_prompt:
                chat_history.add_message({"role": Roles.system, "content": system_prompt})
            else:
                chat_history.add_message({"role": Roles.system, "content": self.system_prompt})

        if message is not None and add_message_to_chat_history:
            chat_history.add_message(
                {
                    "role": role,
                    "content": message,
                }
            )

        messages = chat_history.get_chat_messages()
        if message is not None and not add_message_to_chat_history:
            messages.append(
                {
                    "role": role,
                    "content": message,
                },
            )

        if system_prompt:
            if messages[0]["role"] != Roles.system and (messages[0]["role"] != Roles.system.value):
                messages.insert(0, {"role": Roles.system, "content": system_prompt})
            else:
                messages[0]["content"] = system_prompt
        else:
            if messages[0]["role"] != Roles.system and (messages[0]["role"] != Roles.system.value):
                messages.insert(0, {"role": Roles.system, "content": self.system_prompt})
            else:
                messages[0]["content"] = self.system_prompt

        additional_suffix = ""
        if self.add_tools_and_structures_documentation_to_system_prompt:
            after_system_instructions_list = []
            after_system_instructions = ""
            if system_prompt_modules is not None:
                for module in system_prompt_modules:
                    if module.position == SystemPromptModulePosition.after_system_instructions:
                        after_system_instructions_list.append(module.get_formatted_content())
                if len(after_system_instructions_list) > 0:
                    after_system_instructions = "\n\n".join(after_system_instructions_list)
                else:
                    after_system_instructions = ""
            if structured_output_settings.output_type != LlmStructuredOutputType.no_structured_output:
                # additional_suffix = "\n"
                thoughts_and_reasoning = ""

                if structured_output_settings.output_type == LlmStructuredOutputType.function_calling or structured_output_settings.output_type == LlmStructuredOutputType.parallel_function_calling:
                    if structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = function_calling_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": "001_" + structured_output_settings.thoughts_and_reasoning_field_name})
                        function_field_name = "002_" + structured_output_settings.function_calling_name_field_name
                        arguments_field_name = "003_" + structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = "004_" + structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        function_field_name = "001_" + structured_output_settings.function_calling_name_field_name
                        arguments_field_name = "002_" + structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = "003_" + structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                    elif structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = function_calling_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": structured_output_settings.thoughts_and_reasoning_field_name})
                        function_field_name = structured_output_settings.function_calling_name_field_name
                        arguments_field_name = structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = ""
                        function_field_name = structured_output_settings.function_calling_name_field_name
                        arguments_field_name = structured_output_settings.function_calling_content
                        heartbeat_beats = ""
                        if structured_output_settings.add_heartbeat_field:
                            heartbeat_field_name = structured_output_settings.heartbeat_field_name
                            heartbeat_beats = function_calling_heart_beats_templater
                            heartbeat_beats = heartbeat_beats.generate_prompt(
                                {"heartbeat_field_name": heartbeat_field_name})
                        function_list = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = function_calling_system_prompt_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "function_field_name": function_field_name,
                                                                       "arguments_field_name": arguments_field_name,
                                                                       "heart_beats": heartbeat_beats,
                                                                       "function_list": function_calling_function_list_templater.generate_prompt(
                                                                           {"function_list": function_list})})
                        messages[0]["content"] = system_prompt
                elif structured_output_settings.output_type == LlmStructuredOutputType.object_instance or structured_output_settings.output_type == LlmStructuredOutputType.list_of_objects:
                    if structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = structured_output_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": "001_" + structured_output_settings.thoughts_and_reasoning_field_name})
                        model_field_name = "002_" + structured_output_settings.output_model_name_field_name
                        fields_field_name = "003_" + structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = ""
                        model_field_name = "001_" + structured_output_settings.output_model_name_field_name
                        fields_field_name = "002_" + structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt
                    elif structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        thoughts_and_reasoning = structured_output_thoughts_and_reasoning_templater
                        thoughts_and_reasoning = thoughts_and_reasoning.generate_prompt({
                            "thoughts_and_reasoning_field_name": structured_output_settings.thoughts_and_reasoning_field_name})
                        model_field_name = structured_output_settings.output_model_name_field_name
                        fields_field_name = structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt
                    elif not structured_output_settings.add_thoughts_and_reasoning_field and not self.provider.is_using_json_schema_constraints():

                        model_field_name = structured_output_settings.output_model_name_field_name
                        fields_field_name = structured_output_settings.output_model_attributes_field_name

                        output_models = structured_output_settings.get_llm_documentation(
                            provider=self.provider)
                        system_prompt = structured_output_templater
                        system_prompt = system_prompt.generate_prompt({"system_instructions": messages[0]["content"],
                                                                       "after_system_instructions": after_system_instructions,
                                                                       "thoughts_and_reasoning": thoughts_and_reasoning,
                                                                       "model_field_name": model_field_name,
                                                                       "fields_field_name": fields_field_name,
                                                                       "output_models": output_models})
                        messages[0]["content"] = system_prompt

            if structured_output_settings.output_type == LlmStructuredOutputType.no_structured_output or structured_output_settings is None:
                messages[0]["content"] += "\n" + after_system_instructions
        at_end_list = []
        if system_prompt_modules is not None:
            for module in system_prompt_modules:
                if module.position == SystemPromptModulePosition.at_end:
                    at_end_list.append(module.get_formatted_content())
            if len(at_end_list) > 0:
                at_end_list = "\n\n".join(at_end_list)
            else:
                at_end_list = ""

            messages[0]["content"] += at_end_list
        prompt, response_role = self.messages_formatter.format_conversation(
            messages, Roles.assistant
        )

        if prompt_suffix:
            prompt += prompt_suffix

        if self.debug_output:
            print(prompt, end="")

        return (
            self.provider.create_completion(
                prompt if self.provider.get_provider_identifier() is not LlmProviderId.groq else messages,
                structured_output_settings,
                llm_sampling_settings,
                self.messages_formatter.bos_token,
            ),
            response_role,
        )

    @staticmethod
    def remove_any(text, list_of_strings):
        for item in list_of_strings:
            text = text.replace(item, "")
        return text
__init__(provider, name='llamacpp_agent', system_prompt='You are a helpful assistant.', predefined_messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, chat_history=None, add_tools_and_structures_documentation_to_system_prompt=True, debug_output=False)

Initializes a new LlamaCppAgent object.

Parameters:

  • provider (LlmProvider) –

    The underlying llm provider (LlamaCppServerProvider, LlamaCppPythonProvider, TGIServerProvider or VLLMServerProvider).

  • name (str, default: 'llamacpp_agent' ) –

    The name of the agent.

  • system_prompt (str, default: 'You are a helpful assistant.' ) –

    The system prompt used in chat interactions.

  • predefined_messages_formatter_type (MessagesFormatterType, default: CHATML ) –

    The type of predefined messages formatter.

  • custom_messages_formatter (MessagesFormatter, default: None ) –

    Custom message's formatter.

  • chat_history (ChatHistory, default: None ) –

    This will handle the chat history.

  • add_tools_and_structures_documentation_to_system_prompt (bool, default: True ) –

    Will suffix system prompt dynamically with documentation for function calling or structured output.

  • debug_output (bool, default: False ) –

    Indicates whether debug output should be enabled.

Source code in llama_cpp_agent/llm_agent.py
def __init__(
        self,
        provider: LlmProvider,
        name: str = "llamacpp_agent",
        system_prompt: str = "You are a helpful assistant.",
        predefined_messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        chat_history: ChatHistory = None,
        add_tools_and_structures_documentation_to_system_prompt: bool = True,
        debug_output: bool = False,
):
    """
    Initializes a new LlamaCppAgent object.

    Args:
       provider (LlmProvider):The underlying llm provider (LlamaCppServerProvider, LlamaCppPythonProvider, TGIServerProvider or VLLMServerProvider).
       name (str): The name of the agent.
       system_prompt (str): The system prompt used in chat interactions.
       predefined_messages_formatter_type (MessagesFormatterType): The type of predefined messages formatter.
       custom_messages_formatter (MessagesFormatter): Custom message's formatter.
       chat_history (ChatHistory): This will handle the chat history.
       add_tools_and_structures_documentation_to_system_prompt (bool): Will suffix system prompt dynamically with documentation for function calling or structured output.
       debug_output (bool): Indicates whether debug output should be enabled.
    """
    self.provider = provider
    self.name = name
    self.debug_output = debug_output
    if custom_messages_formatter is not None:
        self.messages_formatter = custom_messages_formatter
    else:
        self.messages_formatter = get_predefined_messages_formatter(
            predefined_messages_formatter_type
        )
    self.last_response = ""
    if chat_history is None:
        self.chat_history = BasicChatHistory()
    else:
        self.chat_history = chat_history

    self.add_message(role=Roles.system, message=system_prompt)
    self.system_prompt = system_prompt
    self.add_tools_and_structures_documentation_to_system_prompt = add_tools_and_structures_documentation_to_system_prompt
add_message(message, role)

Adds a message to the chat history.

Parameters:

  • message (str) –

    The content of the message.

  • role (Literal['system'] | Literal['user'] | Literal['assistant'] | Literal['tool']) –

    The role of the message sender.

Source code in llama_cpp_agent/llm_agent.py
def add_message(
        self,
        message: str,
        role: Roles,
):
    """
    Adds a message to the chat history.

    Args:
        message (str): The content of the message.
        role (Literal["system"] | Literal["user"] | Literal["assistant"] | Literal["tool"]): The role of the message sender.
    """
    self.chat_history.add_message(
        {
            "role": role,
            "content": message,
        }
    )
get_text_response(prompt=None, structured_output_settings=None, llm_sampling_settings=None, streaming_callback=None, returns_streaming_generator=False, print_output=False)

Get a text response from the LLM provider.

Parameters:

  • prompt (str | list[int], default: None ) –

    The prompt for the LLM.

  • structured_output_settings (LlmStructuredOutputSettings, default: None ) –

    Settings for structured output.

  • llm_sampling_settings (LlmSamplingSettings, default: None ) –

    Sampling settings for the LLM.

  • streaming_callback (Callable[[StreamingResponse], None], default: None ) –

    Callback for streaming responses.

  • returns_streaming_generator (bool, default: False ) –

    Whether to return a generator streaming the results.

  • print_output (bool, default: False ) –

    Whether to print the output.

Returns:

  • Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]]

    Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated response. A string message, a list of function calls, an object from structured output or a generator for the response

Source code in llama_cpp_agent/llm_agent.py
def get_text_response(
        self,
        prompt: str = None,
        structured_output_settings: LlmStructuredOutputSettings = None,
        llm_sampling_settings: LlmSamplingSettings = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        returns_streaming_generator: bool = False,
        print_output: bool = False,
) -> Union[
    str,
    List[dict],
    BaseModel,
    Generator[Any, Any, str | BaseModel | list[BaseModel]],
]:
    """
    Get a text response from the LLM provider.

    Args:
        prompt (str | list[int]): The prompt for the LLM.
        structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
        llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
        streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
        returns_streaming_generator (bool): Whether to return a generator streaming the results.
        print_output (bool): Whether to print the output.

    Returns:
        Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated response. A string message, a list of function calls, an object from structured output or a generator for the response
    """

    if self.debug_output:
        if type(prompt) is str:
            print(prompt, end="")

    if structured_output_settings is None:
        structured_output_settings = LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.no_structured_output
        )
    if llm_sampling_settings is None:
        llm_sampling_settings = self.provider.get_provider_default_settings()
    else:
        llm_sampling_settings = deepcopy(llm_sampling_settings)

    if llm_sampling_settings.get_additional_stop_sequences() is not None:
        llm_sampling_settings.add_additional_stop_sequences(
            self.messages_formatter.default_stop_sequences
        )

    if self.provider:
        completion = self.get_text_completion(
            prompt=prompt,
            structured_output_settings=structured_output_settings,
            llm_samplings_settings=llm_sampling_settings,
        )

        def stream_results():
            full_response_stream = ""
            for out_stream in completion:
                out_text = out_stream["choices"][0]["text"]
                full_response_stream += out_text
                yield out_text

            return structured_output_settings.handle_structured_output(
                full_response_stream, provider=self.provider
            )

        if llm_sampling_settings.is_streaming():
            full_response = ""
            if returns_streaming_generator:
                return stream_results()
            for out in completion:
                text = out["choices"][0]["text"]
                full_response += text
                if streaming_callback is not None:
                    streaming_callback(
                        StreamingResponse(text=text, is_last_response=False)
                    )
                if print_output:
                    print(text, end="")
            if streaming_callback is not None:
                streaming_callback(
                    StreamingResponse(text="", is_last_response=True)
                )
            if print_output or self.debug_output:
                print("")
            self.last_response = full_response
            return structured_output_settings.handle_structured_output(
                full_response, provider=self.provider
            )
        else:
            full_response = ""
            text = completion["choices"][0]["text"]
            full_response += text
            if print_output or self.debug_output:
                print(full_response)
            self.last_response = full_response
            return structured_output_settings.handle_structured_output(
                full_response, provider=self.provider
            )
    return "Error: No model loaded!"
get_chat_response(message=None, role=Roles.user, prompt_suffix=None, chat_history=None, system_prompt=None, system_prompt_modules=None, add_message_to_chat_history=True, add_response_to_chat_history=True, structured_output_settings=None, llm_sampling_settings=None, streaming_callback=None, returns_streaming_generator=False, print_output=False)

Get a chat response based on the input message and context.

Parameters:

  • message (str, default: None ) –

    The input message.

  • role (Literal['system', 'user', 'assistant', 'tool'], default: user ) –

    The role of the message sender.

  • prompt_suffix (str, default: None ) –

    Suffix to append after the prompt.

  • chat_history (ChatHistory, default: None ) –

    Overwrite internal ChatHistory of the agent.

  • system_prompt (str, default: None ) –

    Overwrites the system prompt set on the agent initialization.

  • system_prompt_modules (SystemPromptModules, default: None ) –

    Additional sections added to the system prompt.

  • add_message_to_chat_history (bool, default: True ) –

    Whether to add the input message to the chat history.

  • add_response_to_chat_history (bool, default: True ) –

    Whether to add the generated response to the chat history.

  • structured_output_settings (LlmStructuredOutputSettings, default: None ) –

    Settings for structured output.

  • llm_sampling_settings (LlmSamplingSettings, default: None ) –

    Sampling settings for the LLM.

  • streaming_callback (Callable[[StreamingResponse], None], default: None ) –

    Callback for streaming responses.

  • returns_streaming_generator (bool, default: False ) –

    Whether to return a generator streaming the results.

  • print_output (bool, default: False ) –

    Whether to print the generated response.

Returns:

  • Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]]

    Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated chat response. A string message, a list of function calls, an object from structured output or a generator for the response

Source code in llama_cpp_agent/llm_agent.py
def get_chat_response(
        self,
        message: str = None,
        role: Roles = Roles.user,
        prompt_suffix: str = None,
        chat_history: ChatHistory = None,
        system_prompt: str = None,
        system_prompt_modules: list[SystemPromptModule] = None,
        add_message_to_chat_history: bool = True,
        add_response_to_chat_history: bool = True,
        structured_output_settings: LlmStructuredOutputSettings = None,
        llm_sampling_settings: LlmSamplingSettings = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        returns_streaming_generator: bool = False,
        print_output: bool = False,
) -> Union[
    str,
    List[dict],
    BaseModel,
    Generator[Any, Any, str | BaseModel | list[BaseModel]],
]:
    """
    Get a chat response based on the input message and context.

    Args:
        message (str): The input message.
        role (Literal["system", "user", "assistant", "tool"]): The role of the message sender.
        prompt_suffix (str): Suffix to append after the prompt.
        chat_history (ChatHistory): Overwrite internal ChatHistory of the agent.
        system_prompt (str): Overwrites the system prompt set on the agent initialization.
        system_prompt_modules (SystemPromptModules): Additional sections added to the system prompt.
        add_message_to_chat_history (bool): Whether to add the input message to the chat history.
        add_response_to_chat_history (bool): Whether to add the generated response to the chat history.
        structured_output_settings (LlmStructuredOutputSettings): Settings for structured output.
        llm_sampling_settings (LlmSamplingSettings): Sampling settings for the LLM.
        streaming_callback (Callable[[StreamingResponse], None]): Callback for streaming responses.
        returns_streaming_generator (bool): Whether to return a generator streaming the results.
        print_output (bool): Whether to print the generated response.

    Returns:
        Union[str, List[dict], BaseModel, Generator[Any, Any, str | BaseModel | list[BaseModel]]: The generated chat response. A string message, a list of function calls, an object from structured output or a generator for the response
    """
    if chat_history is None:
        chat_history = self.chat_history

    if structured_output_settings is None:
        structured_output_settings = LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.no_structured_output
        )
    if llm_sampling_settings is None:
        llm_sampling_settings = self.provider.get_provider_default_settings()
    else:
        llm_sampling_settings = deepcopy(llm_sampling_settings)

    if llm_sampling_settings.get_additional_stop_sequences() is not None:
        llm_sampling_settings.add_additional_stop_sequences(
            self.messages_formatter.default_stop_sequences
        )

    completion, response_role = self.get_response_role_and_completion(
        message=message,
        chat_history=chat_history,
        system_prompt=system_prompt,
        system_prompt_modules=system_prompt_modules,
        add_message_to_chat_history=add_message_to_chat_history,
        role=role,
        prompt_suffix=prompt_suffix,
        structured_output_settings=structured_output_settings,
        llm_sampling_settings=llm_sampling_settings,
    )

    def stream_results():
        full_response_stream = ""
        for out_stream in completion:
            out_text = out_stream["choices"][0]["text"]
            if out_text != self.messages_formatter.eos_token:
                full_response_stream += out_text
                yield out_text
        if prompt_suffix:
            full_response_stream = prompt_suffix + full_response_stream
        self.last_response = full_response_stream
        if add_response_to_chat_history:
            chat_history.add_message(
                {
                    "role": response_role,
                    "content": full_response_stream,
                }
            )
        return structured_output_settings.handle_structured_output(
            full_response_stream, prompt_suffix=prompt_suffix, provider=self.provider
        )

    if self.provider:
        if returns_streaming_generator:
            return stream_results()
        if llm_sampling_settings.is_streaming():
            full_response = ""
            for out in completion:
                text = out["choices"][0]["text"]
                if text != self.messages_formatter.eos_token:
                    full_response += text
                    if streaming_callback is not None:
                        streaming_callback(
                            StreamingResponse(text=text, is_last_response=False)
                        )
                    if print_output or self.debug_output:
                        print(text, end="")
            if streaming_callback is not None:
                streaming_callback(
                    StreamingResponse(text="", is_last_response=True)
                )
            if print_output or self.debug_output:
                print("")
            if prompt_suffix:
                full_response = prompt_suffix + full_response
            self.last_response = full_response
            if add_response_to_chat_history:
                chat_history.add_message(
                    {
                        "role": response_role,
                        "content": full_response,
                    }
                )

            return structured_output_settings.handle_structured_output(
                full_response, prompt_suffix=prompt_suffix, provider=self.provider
            )
        else:
            text = completion["choices"][0]["text"]
            if text.strip().endswith(self.messages_formatter.eos_token):
                text = text.replace(self.messages_formatter.eos_token, "")
            if print_output or self.debug_output:
                print(text)
            if prompt_suffix:
                text = prompt_suffix + text
            self.last_response = text
            if add_response_to_chat_history:
                chat_history.add_message(
                    {
                        "role": response_role,
                        "content": text,
                    }
                )

            return structured_output_settings.handle_structured_output(text, prompt_suffix=prompt_suffix, provider=self.provider)
    return "Error: No model loaded!"

Structured Output Settings

llama_cpp_agent.llm_output_settings.settings

LlmStructuredOutputType

Bases: Enum

Enum for defining different types of structured outputs that can be generated by a Language Model.

Source code in llama_cpp_agent/llm_output_settings/settings.py
class LlmStructuredOutputType(Enum):
    """
    Enum for defining different types of structured outputs that can be generated by a Language Model.
    """

    no_structured_output = "no_structured_output"
    object_instance = "object_instance"
    list_of_objects = "list_of_objects"
    function_calling = "function_calling"
    parallel_function_calling = "parallel_function_calling"

LlmStructuredOutputSettings

Bases: BaseModel

Settings for structured output of large language models for using tools like function calling and creating instances of pydantic models.

Attributes:

  • output_type (LlmStructuredOutputType) –

    Defines the type of structured output.

  • function_tools (Optional[List[LlamaCppFunctionTool]]) –

    Tools to enable function calling.

  • pydantic_models (Optional[List[type[BaseModel]]]) –

    List of pydantic models for structured data output.

  • add_thoughts_and_reasoning_field (Optional[bool]) –

    Add thoughts and reasoning field to function calling. Defaults to False.

  • thoughts_and_reasoning_field_name (Optional[str]) –

    Field name for the thoughts and reasoning field. Defaults to "thoughts_and_reasoning".

  • function_calling_name_field_name (Optional[str]) –

    Name of the JSON field for the name of the used function. Defaults to "function".

  • function_calling_content (Optional[str]) –

    Name of the JSON field for the arguments of the used function. Defaults to "arguments".

  • output_model_name_field_name (Optional[str]) –

    Name of the JSON field for the name of the used pydantic model. Defaults to "model".

  • output_model_attributes_field_name (Optional[str]) –

    Name of the JSON field for the fields of the pydantic model. Defaults to "fields".

Methods:

  • from_llama_cpp_function_tools

    Create settings from a list of LlamaCppFunctionTools with a specific output type.

  • from_pydantic_models

    Create settings from a list of Pydantic models with a specific output type.

  • from_open_ai_tools

    Create settings from OpenAI tools for structured outputs.

  • from_functions

    Create settings from a list of callable functions with a specific output type.

  • from_llama_index_tools

    Create settings from a list of llama-index tools with a specific output type.

  • to_openai_tools

    Return a list of OpenAI tools.

  • add_llama_cpp_function_tool

    Add a LlamaCppFunctionTool to the settings.

  • add_pydantic_model

    Add a Pydantic model to the settings, ensuring it matches the specified output type.

  • add_open_ai_tool

    Add an OpenAI tool to the settings, ensuring it matches the specified output type.

  • add_function_tool

    Add a callable function to the settings, ensuring it matches the specified output type.

  • add_llama_index_tool

    Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.

  • get_llm_documentation

    Generate documentation for the models and tools configured within the settings, based on the output type.

  • get_gbnf_grammar

    Generate a GBNF grammar for tools configured within the settings, based on the output type.

  • get_json_schema

    Generate a JSON schema for the tools configured within the settings, based on the output type.

Source code in llama_cpp_agent/llm_output_settings/settings.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
class LlmStructuredOutputSettings(BaseModel):
    """
    Settings for structured output of large language models for using tools like function calling and creating instances of pydantic models.

    Attributes:
        output_type (LlmStructuredOutputType): Defines the type of structured output.
        function_tools (Optional[List[LlamaCppFunctionTool]]): Tools to enable function calling.
        pydantic_models (Optional[List[type[BaseModel]]]): List of pydantic models for structured data output.
        add_thoughts_and_reasoning_field (Optional[bool]): Add thoughts and reasoning field to function calling. Defaults to False.
        thoughts_and_reasoning_field_name (Optional[str]): Field name for the thoughts and reasoning field. Defaults to "thoughts_and_reasoning".
        function_calling_name_field_name (Optional[str]): Name of the JSON field for the name of the used function. Defaults to "function".
        function_calling_content (Optional[str]): Name of the JSON field for the arguments of the used function. Defaults to "arguments".
        output_model_name_field_name (Optional[str]): Name of the JSON field for the name of the used pydantic model. Defaults to "model".
        output_model_attributes_field_name (Optional[str]): Name of the JSON field for the fields of the pydantic model. Defaults to "fields".

    Methods:
        from_llama_cpp_function_tools: Create settings from a list of LlamaCppFunctionTools with a specific output type.
        from_pydantic_models: Create settings from a list of Pydantic models with a specific output type.
        from_open_ai_tools: Create settings from OpenAI tools for structured outputs.
        from_functions: Create settings from a list of callable functions with a specific output type.
        from_llama_index_tools: Create settings from a list of llama-index tools with a specific output type.
        to_openai_tools: Return a list of OpenAI tools.
        add_llama_cpp_function_tool: Add a LlamaCppFunctionTool to the settings.
        add_pydantic_model: Add a Pydantic model to the settings, ensuring it matches the specified output type.
        add_open_ai_tool: Add an OpenAI tool to the settings, ensuring it matches the specified output type.
        add_function_tool: Add a callable function to the settings, ensuring it matches the specified output type.
        add_llama_index_tool: Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.
        get_llm_documentation: Generate documentation for the models and tools configured within the settings, based on the output type.
        get_gbnf_grammar: Generate a GBNF grammar for tools configured within the settings, based on the output type.
        get_json_schema: Generate a JSON schema for the tools configured within the settings, based on the output type.
    """

    output_type: Optional[LlmStructuredOutputType] = Field(
        ..., description="The output type of the llm"
    )
    function_tools: Optional[List[LlamaCppFunctionTool]] = Field(
        None, description="List of functions tools for function calling"
    )
    pydantic_models: Optional[List[type[BaseModel]]] = Field(
        None, description="List of pydantic models for structured output"
    )
    add_thoughts_and_reasoning_field: Optional[bool] = Field(
        False, description="Add thoughts and reasoning field to function calling output"
    )
    add_heartbeat_field: Optional[bool] = Field(
        False,
        description="Add heartbeat field to function calling output to let the LLM indicate if it wants control back after this function call."
    )

    thoughts_and_reasoning_field_name: Optional[str] = Field(
        "thoughts_and_reasoning",
        description="Field name for the thoughts and reasoning field",
    )

    heartbeat_field_name: Optional[str] = Field(
        "heartbeat",
        description="Field name for the heartbeat field",
    )

    heartbeat_function_names_list: Optional[List[str]] = Field(
        [],
        description="List of function names that get added a heartbeat field to function calling",
    )
    function_calling_name_field_name: Optional[str] = Field(
        "function",
        description="Name of the JSON field for the name of the used function.",
    )
    function_calling_content: Optional[str] = Field(
        "arguments",
        description="Name of the JSON field for the arguments of the used function.",
    )

    output_model_name_field_name: Optional[str] = Field(
        "model",
        description="Name of the JSON field for the name of the used pydantic model.",
    )
    output_model_attributes_field_name: Optional[str] = Field(
        "fields",
        description="Name of the JSON field for the fields of the pydantic model.",
    )

    output_raw_json_string: Optional[bool] = Field(
        False,
        description="If the output should be just the generated JSON string by the LLM",
    )
    output_structured_output_and_raw_json_string: Optional[bool] = Field(
        False,
        description="If the output should be a tuple of the output and the generated JSON string by the LLM",
    )

    class Config:
        arbitrary_types_allowed = True

    @staticmethod
    def from_llama_cpp_function_tools(
            llama_cpp_function_tools: List[LlamaCppFunctionTool],
            allow_parallel_function_calling: bool = False,
            add_thoughts_and_reasoning_field: bool = False,
            add_heartbeat_field: bool = False,
    ):
        """
        Create settings from a list of LlamaCppFunctionTools with a specific output type.

        Args:
            llama_cpp_function_tools (List[LlamaCppFunctionTool]): List of function tools.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
            add_thoughts_and_reasoning_field (bool): Whether to add thoughts and reasoning field to function calling. Defaults to False.:
            add_heartbeat_field (bool): Whether to add heartbeat field to function calling. Defaults to False.:
        Returns:
            LlmStructuredOutputSettings: Configured settings object.
        """
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.function_calling
            if not allow_parallel_function_calling
            else LlmStructuredOutputType.parallel_function_calling,
            function_tools=llama_cpp_function_tools,
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
            add_heartbeat_field=add_heartbeat_field
        )

    @staticmethod
    def from_pydantic_models(
            models: List[type[BaseModel]], output_type: LlmStructuredOutputType,
            add_thoughts_and_reasoning_field: bool = False
    ):
        """
        Create settings from a list of Pydantic models with a specific output type.

        Args:
            models (List[BaseModel]): List of Pydantic models.
            output_type (LlmStructuredOutputType): Desired output type.

        Returns:
            LlmStructuredOutputSettings: Configured settings object.

        Raises:
            NotImplementedError: If no structured output is specified for the output type.
        """
        if output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif output_type is LlmStructuredOutputType.object_instance:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.object_instance,
                pydantic_models=models,
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )
        elif output_type is LlmStructuredOutputType.list_of_objects:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.list_of_objects,
                pydantic_models=models,
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )
        elif output_type is LlmStructuredOutputType.function_calling:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.function_calling,
                function_tools=[LlamaCppFunctionTool(model) for model in models],
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )
        elif output_type is LlmStructuredOutputType.parallel_function_calling:
            return LlmStructuredOutputSettings(
                output_type=LlmStructuredOutputType.parallel_function_calling,
                function_tools=[LlamaCppFunctionTool(model) for model in models],
                add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
            )

    @staticmethod
    def from_open_ai_tools(
            tools: List[Tuple[Dict[str, Any], Callable]],
            allow_parallel_function_calling: bool = False,
    ):
        """
        Create settings from OpenAI tools for structured outputs.

        Args:
            tools (List[Tuple[Dict[str, Any], Callable]]): List of OpenAI tools defined by a schema and associated function.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.

        Returns:
            LlmStructuredOutputSettings: Configured settings object.
        """
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.parallel_function_calling
            if allow_parallel_function_calling
            else LlmStructuredOutputType.function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in tools],
        )

    @staticmethod
    def from_functions(
            tools: List[Callable], allow_parallel_function_calling: bool = False,
            add_thoughts_and_reasoning_field: bool = False,
            add_heartbeat_field: bool = False,
    ):
        """
        Create settings from a list of llama-index tools with a specific output type.

        Args:
            tools (list): List of llama-index tools.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
            add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
            add_heartbeat_field (bool): Whether to add a heartbeat field to output.
        Returns:
            LlmStructuredOutputSettings: Configured settings object.

        Raises:
            NotImplementedError: If the specified output type is not supported for tools.
        """
        return LlmStructuredOutputSettings(
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
            output_type=LlmStructuredOutputType.parallel_function_calling
            if allow_parallel_function_calling
            else LlmStructuredOutputType.function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in tools],
            add_heartbeat_field=add_heartbeat_field
        )

    @staticmethod
    def from_llama_index_tools(
            tools: list, allow_parallel_function_calling: bool = False, add_thoughts_and_reasoning_field: bool = False,
            add_heartbeat_field: bool = False,
    ):
        """
        Create settings from a list of llama-index tools with a specific output type. Has to be either LlmOutputType.function_call or LlmOutputType.parallel_function_call.

        Args:
            tools (list): List of llama-index tools.
            allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
            add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
            add_heartbeat_field (bool): Whether to add a heartbeat field to output.
        Returns:
            LlmStructuredOutputSettings: Configured settings object.

        Raises:
            NotImplementedError: If the specified output type is not supported for tools.
        """
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.parallel_function_calling
            if allow_parallel_function_calling
            else LlmStructuredOutputType.function_calling,
            function_tools=[
                LlamaCppFunctionTool.from_llama_index_tool(model) for model in tools
            ],
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
            add_heartbeat_field=add_heartbeat_field
        )

    def to_openai_tools(self):
        """
        Return a list of OpenAI tools.
        Returns:
            List[Dict[str, Any]]: List of OpenAI tools.

        Raises:
            NotImplementedError: If the specified output type is not supported for tools.
        """
        if self.function_tools is not None:
            return [tool.to_openai_tool() for tool in self.function_tools]

    def add_llama_cpp_function_tool(self, tool: LlamaCppFunctionTool):
        """
        Add a LlamaCppFunctionTool to the settings.

        Args:
            tool (LlamaCppFunctionTool): The function tool to add.
        """
        self.function_tools.append(tool)

    def add_pydantic_model(self, model: BaseModel, name: str = None):
        """
        Add a Pydantic model to the settings, ensuring it matches the specified output type.

        Args:
            model (BaseModel): The Pydantic model to add.

        Raises:
            NotImplementedError: If no structured output is specified.
        """
        if name is not None:
            model.__name__ = name
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.object_instance:
            self.pydantic_models.append(model)
        elif self.output_type is LlmStructuredOutputType.list_of_objects:
            self.pydantic_models.append(model)
        elif self.output_type is LlmStructuredOutputType.function_calling:
            self.function_tools.append(LlamaCppFunctionTool(model))
        elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
            self.function_tools.append(LlamaCppFunctionTool(model))

    def add_open_ai_tool(
            self, open_ai_schema_and_function: Tuple[Dict[str, Any], Callable], name: str = None
    ):
        """
        Add an OpenAI tool to the settings, ensuring it matches the specified output type.

        Args:
            open_ai_schema_and_function (Tuple[Dict[str, Any], Callable]): The OpenAI schema and associated function to add.

        Raises:
            NotImplementedError: If the output type does not support adding tools.
        """
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            tool = LlamaCppFunctionTool(open_ai_schema_and_function)
            if name is not None:
                tool.set_name(name)
            self.function_tools.append(
                tool
            )
        else:
            raise NotImplementedError(
                f"LlmOutputType: {self.output_type.value} not supported for tools!"
            )

    def add_function_tool(self, function: Callable, name: str = None):
        """
        Add a callable function to the settings, ensuring it matches the specified output type.

        Args:
            function (Callable): The function to add.

        Raises:
            NotImplementedError: If the output type does not support adding tools.
        """
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            tool = LlamaCppFunctionTool(function)
            if name is not None:
                tool.set_name(name)
            self.function_tools.append(
                tool
            )
        else:
            raise NotImplementedError(
                f"LlmOutputType: {self.output_type.value} not supported for tools!"
            )

    def add_llama_index_tool(self, tool, name: str = None):
        """
        Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.

        Args:
            tool: The llama-index tool to add.

        Raises:
            NotImplementedError: If the output type does not support adding tools.
        """
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            tool = LlamaCppFunctionTool.from_llama_index_tool(tool)
            if name is not None:
                tool.set_name(name)
            self.function_tools.append(
                tool
            )
        else:
            raise NotImplementedError(
                f"LlmOutputType: {self.output_type.value} not supported for tools!"
            )

    def set_name(self, index: int, name: str):
        if self.output_type is LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
            self.function_tools[index].set_name(name)
        else:
            self.pydantic_models[index].__name__ = name

    def get_llm_documentation(self, provider):
        """
        Generate documentation for the models and tools configured within the settings, based on the output type.

        Returns:
            str: Generated documentation for the configured models or tools.

        Raises:
            NotImplementedError: If no structured output is specified.
        """
        json_schema_mode = False
        from llama_cpp_agent.providers.tgi_server import TGIServerProvider
        from llama_cpp_agent.providers.vllm_server import VLLMServerProvider

        if isinstance(provider, TGIServerProvider) or isinstance(
                provider, VLLMServerProvider
        ):
            json_schema_mode = True
        if self.output_type == LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type == LlmStructuredOutputType.object_instance:
            return generate_text_documentation(
                self.pydantic_models, ordered_json_mode=json_schema_mode
            ).strip()
        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            return generate_text_documentation(
                self.pydantic_models, ordered_json_mode=json_schema_mode
            ).strip()
        elif self.output_type == LlmStructuredOutputType.function_calling:
            return generate_text_documentation(
                [tool.model for tool in self.function_tools],
                model_prefix="Function",
                fields_prefix="Parameters",
                ordered_json_mode=json_schema_mode,
            ).strip()
        elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
            return generate_text_documentation(
                [tool.model for tool in self.function_tools],
                model_prefix="Function",
                fields_prefix="Parameters",
                ordered_json_mode=json_schema_mode,
            ).strip()

    def get_gbnf_grammar(self):
        """
        Generate a GBNF grammar for tools configured within the settings, based on the output type.

        Returns:
            str: Generated GBNF grammar for the configured models or tools.

        Raises:
            NotImplementedError: If no structured output is specified.
        """

        if self.output_type == LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type == LlmStructuredOutputType.object_instance:
            return generate_gbnf_grammar_from_pydantic_models(
                self.pydantic_models,
                list_of_outputs=False,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_name_field_name),
                outer_object_content=(self.output_model_attributes_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_attributes_field_name),
                inner_thought_field_name=self.thoughts_and_reasoning_field_name,
                allow_only_inner_thoughts=False,
                add_request_heartbeat=False,
            )
        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            return generate_gbnf_grammar_from_pydantic_models(
                self.pydantic_models,
                list_of_outputs=True,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_name_field_name),
                outer_object_content=(self.output_model_attributes_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.output_model_attributes_field_name),
                inner_thought_field_name=(self.thoughts_and_reasoning_field_name),
                allow_only_inner_thoughts=False,
                add_request_heartbeat=False,
            )
        elif self.output_type == LlmStructuredOutputType.function_calling:
            return generate_gbnf_grammar_from_pydantic_models(
                [tool.model for tool in self.function_tools],
                list_of_outputs=False,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_name_field_name),
                outer_object_content=(self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_content),
                inner_thought_field_name=self.thoughts_and_reasoning_field_name,
                allow_only_inner_thoughts=False,
                add_request_heartbeat=self.add_heartbeat_field,
                request_heartbeat_field_name=self.heartbeat_field_name,
                request_heartbeat_models=self.heartbeat_function_names_list
            )
        elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
            return generate_gbnf_grammar_from_pydantic_models(
                [tool.model for tool in self.function_tools],
                list_of_outputs=True,
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                outer_object_name=(self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_name_field_name),
                outer_object_content=(self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else (self.function_calling_content),
                inner_thought_field_name=self.thoughts_and_reasoning_field_name,
                allow_only_inner_thoughts=False,
                add_request_heartbeat=self.add_heartbeat_field,
                request_heartbeat_field_name=self.heartbeat_field_name,
                request_heartbeat_models=self.heartbeat_function_names_list
            )

    def get_json_schema(self):
        """
        Generate a JSON schema for the tools configured within the settings, based on the output type.

        Returns:
            Dict: Generated JSON schema for the configured models or tools.

        Raises:
            NotImplementedError: If no structured output is specified.
        """

        if self.output_type == LlmStructuredOutputType.no_structured_output:
            raise NotImplementedError(
                "LlmOutputType: no_structured_output not supported for structured output and function calling!"
            )
        elif self.output_type == LlmStructuredOutputType.object_instance:
            return generate_json_schemas(
                self.pydantic_models,
                allow_list=False,
                outer_object_name=("001_" + self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.output_model_name_field_name),
                outer_object_properties_name=(
                        "002_" + self.output_model_attributes_field_name
                )
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.output_model_attributes_field_name),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            )
        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            return generate_json_schemas(
                self.pydantic_models,
                allow_list=True,
                outer_object_name=("001_" + self.output_model_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.output_model_name_field_name),
                outer_object_properties_name=(
                        "002_" + self.output_model_attributes_field_name
                )
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.output_model_attributes_field_name),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            )
        elif self.output_type is LlmStructuredOutputType.function_calling:
            return generate_json_schemas(
                [tool.model for tool in self.function_tools],
                allow_list=False,
                outer_object_name=("001_" + self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.function_calling_name_field_name),
                outer_object_properties_name=("002_" + self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.function_calling_content),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                add_heartbeat=self.add_heartbeat_field,
                heartbeat_name=("003_" + self.heartbeat_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("004_" + self.heartbeat_field_name),
                heartbeat_list=self.heartbeat_function_names_list
            )
        elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
            return generate_json_schemas(
                [tool.model for tool in self.function_tools],
                allow_list=True,
                outer_object_name=("001_" + self.function_calling_name_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("002_" + self.function_calling_name_field_name),
                outer_object_properties_name=("002_" + self.function_calling_content)
                if not self.add_thoughts_and_reasoning_field
                else ("003_" + self.function_calling_content),
                inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
                add_inner_thoughts=self.add_thoughts_and_reasoning_field,
                add_heartbeat=self.add_heartbeat_field,
                heartbeat_name=("003_" + self.heartbeat_field_name)
                if not self.add_thoughts_and_reasoning_field
                else ("004_" + self.heartbeat_field_name),
                heartbeat_list=self.heartbeat_function_names_list
            )

    def add_function_name_to_heartbeat_list(self, function_name: str):
        """
        Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
        """
        self.heartbeat_function_names_list.append(function_name)

    def add_all_current_functions_to_heartbeat_list(self, excluded: list[str] = None):
        """
        Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
        """
        if excluded is None:
            excluded = []

        self.heartbeat_function_names_list.extend(
            [tool.model.__name__ for tool in self.function_tools if tool.model.__name__ not in excluded]
        )

    def handle_structured_output(self, llm_output: str, prompt_suffix: str = None, provider=None):

        if self.output_raw_json_string:
            return llm_output

        if prompt_suffix:
            llm_output = llm_output.replace(prompt_suffix, "", 1)

        if (
                self.output_type is LlmStructuredOutputType.function_calling
                or self.output_type is LlmStructuredOutputType.parallel_function_calling
        ):
            output = parse_json_response(llm_output)
            output = self.clean_keys(output)
            if self.output_structured_output_and_raw_json_string:
                return self.handle_function_call(output), llm_output
            return self.handle_function_call(output)
        elif self.output_type == LlmStructuredOutputType.object_instance:
            output = parse_json_response(llm_output)
            output = self.clean_keys(output)
            model_name = output[self.output_model_name_field_name]
            model_attributes = output[self.output_model_attributes_field_name]
            for model in self.pydantic_models:
                if model_name == model.__name__:
                    if self.output_structured_output_and_raw_json_string:
                        return model(**model_attributes), llm_output
                    return model(**model_attributes)

        elif self.output_type == LlmStructuredOutputType.list_of_objects:
            output = parse_json_response(llm_output)
            output = self.clean_keys(output)
            models = []
            for out in output:
                for model in self.pydantic_models:
                    model_name = out[self.output_model_name_field_name]
                    model_attributes = out[self.output_model_attributes_field_name]
                    if model_name == model.__name__:
                        models.append(model(**model_attributes))
            if self.output_structured_output_and_raw_json_string:
                return models, llm_output
            return models
        return llm_output

    def handle_function_call(self, function_call_response: Union[dict, List[dict]]):
        """
        Handle a function call response and return the output.

        Args:
            function_call_response (dict): The function call response.

        Returns:
            str: The output of the function call or an error message.
        """

        try:
            function_call = function_call_response
            if function_call is None:
                return "Error: Invalid function call response."
            if not self.output_type == LlmStructuredOutputType.parallel_function_calling:
                output = self.intern_function_call(function_call)
            else:
                output = self.intern_parallel_function_call(function_call)

            return output

        except AttributeError as e:
            return f"Error: {e}"

    def intern_function_call(self, function_call: dict):
        """
        Internal method to handle a function call and return the output.

        Args:
            function_call (dict): The function call dictionary.
        Returns:
            str: The output of the function call or an error message.
        """
        if self.function_calling_content in function_call:
            function_tool = None
            for tool in self.function_tools:
                if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                    function_tool = tool
                    break
            if function_tool is not None:
                cls = function_tool.model
                call_parameters = function_call[self.function_calling_content]
                call = cls(**call_parameters)
                output = call.run(**function_tool.additional_parameters)
                return [
                    {
                        self.function_calling_name_field_name: function_tool.model.__name__,
                        self.function_calling_content: call_parameters,
                        "return_value": output,
                    }
                ]

    def intern_parallel_function_call(self, function_calls: List[dict]):
        """
        Internal method to handle a function call and return the output.

        Args:
            function_calls List[dict]: The function call dictionary.

        Returns:
            str: The output of the function call or an error message.
        """
        result = []
        for function_call in function_calls:
            if self.function_calling_content in function_call:
                function_tool = None
                for tool in self.function_tools:
                    if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                        function_tool = tool
                        break
                if function_tool is not None:
                    try:
                        cls = function_tool.model
                        call_parameters = function_call[self.function_calling_content]
                        call = cls(**call_parameters)
                        output = call.run(**function_tool.additional_parameters)
                        result.append(
                            {
                                self.function_calling_name_field_name: function_tool.model.__name__,
                                self.function_calling_content: call_parameters,
                                "return_value": output,
                            }
                        )

                    except AttributeError as e:
                        return f"Error: {e}"

        return result

    def clean_keys(self, data) -> Dict[str, Any] | List[Dict[str, Any]]:
        if isinstance(data, dict):
            # Create a new dictionary with modified keys
            new_dict = {}
            for key, value in data.items():
                # Remove the leading 'XXX_' from keys
                new_key = re.sub(r"^\d{3}_", "", key)
                # Recursively clean nested dictionaries and lists
                new_dict[new_key] = self.clean_keys(value)
            return new_dict
        elif isinstance(data, list):
            # Process each item in the list
            return [self.clean_keys(item) for item in data]
        else:
            # Return the item as is if it's not a dict or list
            return data
from_llama_cpp_function_tools(llama_cpp_function_tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False) staticmethod

Create settings from a list of LlamaCppFunctionTools with a specific output type.

Parameters:

  • llama_cpp_function_tools (List[LlamaCppFunctionTool]) –

    List of function tools.

  • allow_parallel_function_calling (bool, default: False ) –

    Whether to enable parallel function calling. Defaults to False.

  • add_thoughts_and_reasoning_field (bool, default: False ) –

    Whether to add thoughts and reasoning field to function calling. Defaults to False.:

  • add_heartbeat_field (bool, default: False ) –

    Whether to add heartbeat field to function calling. Defaults to False.:

Returns: LlmStructuredOutputSettings: Configured settings object.

Source code in llama_cpp_agent/llm_output_settings/settings.py
@staticmethod
def from_llama_cpp_function_tools(
        llama_cpp_function_tools: List[LlamaCppFunctionTool],
        allow_parallel_function_calling: bool = False,
        add_thoughts_and_reasoning_field: bool = False,
        add_heartbeat_field: bool = False,
):
    """
    Create settings from a list of LlamaCppFunctionTools with a specific output type.

    Args:
        llama_cpp_function_tools (List[LlamaCppFunctionTool]): List of function tools.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
        add_thoughts_and_reasoning_field (bool): Whether to add thoughts and reasoning field to function calling. Defaults to False.:
        add_heartbeat_field (bool): Whether to add heartbeat field to function calling. Defaults to False.:
    Returns:
        LlmStructuredOutputSettings: Configured settings object.
    """
    return LlmStructuredOutputSettings(
        output_type=LlmStructuredOutputType.function_calling
        if not allow_parallel_function_calling
        else LlmStructuredOutputType.parallel_function_calling,
        function_tools=llama_cpp_function_tools,
        add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
        add_heartbeat_field=add_heartbeat_field
    )
from_pydantic_models(models, output_type, add_thoughts_and_reasoning_field=False) staticmethod

Create settings from a list of Pydantic models with a specific output type.

Parameters:

  • models (List[BaseModel]) –

    List of Pydantic models.

  • output_type (LlmStructuredOutputType) –

    Desired output type.

Returns:

  • LlmStructuredOutputSettings

    Configured settings object.

Raises:

  • NotImplementedError

    If no structured output is specified for the output type.

Source code in llama_cpp_agent/llm_output_settings/settings.py
@staticmethod
def from_pydantic_models(
        models: List[type[BaseModel]], output_type: LlmStructuredOutputType,
        add_thoughts_and_reasoning_field: bool = False
):
    """
    Create settings from a list of Pydantic models with a specific output type.

    Args:
        models (List[BaseModel]): List of Pydantic models.
        output_type (LlmStructuredOutputType): Desired output type.

    Returns:
        LlmStructuredOutputSettings: Configured settings object.

    Raises:
        NotImplementedError: If no structured output is specified for the output type.
    """
    if output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif output_type is LlmStructuredOutputType.object_instance:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.object_instance,
            pydantic_models=models,
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )
    elif output_type is LlmStructuredOutputType.list_of_objects:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.list_of_objects,
            pydantic_models=models,
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )
    elif output_type is LlmStructuredOutputType.function_calling:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in models],
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )
    elif output_type is LlmStructuredOutputType.parallel_function_calling:
        return LlmStructuredOutputSettings(
            output_type=LlmStructuredOutputType.parallel_function_calling,
            function_tools=[LlamaCppFunctionTool(model) for model in models],
            add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field
        )
from_open_ai_tools(tools, allow_parallel_function_calling=False) staticmethod

Create settings from OpenAI tools for structured outputs.

Parameters:

  • tools (List[Tuple[Dict[str, Any], Callable]]) –

    List of OpenAI tools defined by a schema and associated function.

  • allow_parallel_function_calling (bool, default: False ) –

    Whether to enable parallel function calling. Defaults to False.

Returns:

  • LlmStructuredOutputSettings

    Configured settings object.

Source code in llama_cpp_agent/llm_output_settings/settings.py
@staticmethod
def from_open_ai_tools(
        tools: List[Tuple[Dict[str, Any], Callable]],
        allow_parallel_function_calling: bool = False,
):
    """
    Create settings from OpenAI tools for structured outputs.

    Args:
        tools (List[Tuple[Dict[str, Any], Callable]]): List of OpenAI tools defined by a schema and associated function.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.

    Returns:
        LlmStructuredOutputSettings: Configured settings object.
    """
    return LlmStructuredOutputSettings(
        output_type=LlmStructuredOutputType.parallel_function_calling
        if allow_parallel_function_calling
        else LlmStructuredOutputType.function_calling,
        function_tools=[LlamaCppFunctionTool(model) for model in tools],
    )
from_functions(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False) staticmethod

Create settings from a list of llama-index tools with a specific output type.

Parameters:

  • tools (list) –

    List of llama-index tools.

  • allow_parallel_function_calling (bool, default: False ) –

    Whether to enable parallel function calling. Defaults to False.

  • add_thoughts_and_reasoning_field (bool, default: False ) –

    Whether to add a thoughts and reasoning field to output.

  • add_heartbeat_field (bool, default: False ) –

    Whether to add a heartbeat field to output.

Returns: LlmStructuredOutputSettings: Configured settings object.

Raises:

  • NotImplementedError

    If the specified output type is not supported for tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py
@staticmethod
def from_functions(
        tools: List[Callable], allow_parallel_function_calling: bool = False,
        add_thoughts_and_reasoning_field: bool = False,
        add_heartbeat_field: bool = False,
):
    """
    Create settings from a list of llama-index tools with a specific output type.

    Args:
        tools (list): List of llama-index tools.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
        add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
        add_heartbeat_field (bool): Whether to add a heartbeat field to output.
    Returns:
        LlmStructuredOutputSettings: Configured settings object.

    Raises:
        NotImplementedError: If the specified output type is not supported for tools.
    """
    return LlmStructuredOutputSettings(
        add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
        output_type=LlmStructuredOutputType.parallel_function_calling
        if allow_parallel_function_calling
        else LlmStructuredOutputType.function_calling,
        function_tools=[LlamaCppFunctionTool(model) for model in tools],
        add_heartbeat_field=add_heartbeat_field
    )
from_llama_index_tools(tools, allow_parallel_function_calling=False, add_thoughts_and_reasoning_field=False, add_heartbeat_field=False) staticmethod

Create settings from a list of llama-index tools with a specific output type. Has to be either LlmOutputType.function_call or LlmOutputType.parallel_function_call.

Parameters:

  • tools (list) –

    List of llama-index tools.

  • allow_parallel_function_calling (bool, default: False ) –

    Whether to enable parallel function calling. Defaults to False.

  • add_thoughts_and_reasoning_field (bool, default: False ) –

    Whether to add a thoughts and reasoning field to output.

  • add_heartbeat_field (bool, default: False ) –

    Whether to add a heartbeat field to output.

Returns: LlmStructuredOutputSettings: Configured settings object.

Raises:

  • NotImplementedError

    If the specified output type is not supported for tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py
@staticmethod
def from_llama_index_tools(
        tools: list, allow_parallel_function_calling: bool = False, add_thoughts_and_reasoning_field: bool = False,
        add_heartbeat_field: bool = False,
):
    """
    Create settings from a list of llama-index tools with a specific output type. Has to be either LlmOutputType.function_call or LlmOutputType.parallel_function_call.

    Args:
        tools (list): List of llama-index tools.
        allow_parallel_function_calling (bool): Whether to enable parallel function calling. Defaults to False.
        add_thoughts_and_reasoning_field (bool): Whether to add a thoughts and reasoning field to output.
        add_heartbeat_field (bool): Whether to add a heartbeat field to output.
    Returns:
        LlmStructuredOutputSettings: Configured settings object.

    Raises:
        NotImplementedError: If the specified output type is not supported for tools.
    """
    return LlmStructuredOutputSettings(
        output_type=LlmStructuredOutputType.parallel_function_calling
        if allow_parallel_function_calling
        else LlmStructuredOutputType.function_calling,
        function_tools=[
            LlamaCppFunctionTool.from_llama_index_tool(model) for model in tools
        ],
        add_thoughts_and_reasoning_field=add_thoughts_and_reasoning_field,
        add_heartbeat_field=add_heartbeat_field
    )
to_openai_tools()

Return a list of OpenAI tools. Returns: List[Dict[str, Any]]: List of OpenAI tools.

Raises:

  • NotImplementedError

    If the specified output type is not supported for tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def to_openai_tools(self):
    """
    Return a list of OpenAI tools.
    Returns:
        List[Dict[str, Any]]: List of OpenAI tools.

    Raises:
        NotImplementedError: If the specified output type is not supported for tools.
    """
    if self.function_tools is not None:
        return [tool.to_openai_tool() for tool in self.function_tools]
add_llama_cpp_function_tool(tool)

Add a LlamaCppFunctionTool to the settings.

Parameters:

Source code in llama_cpp_agent/llm_output_settings/settings.py
def add_llama_cpp_function_tool(self, tool: LlamaCppFunctionTool):
    """
    Add a LlamaCppFunctionTool to the settings.

    Args:
        tool (LlamaCppFunctionTool): The function tool to add.
    """
    self.function_tools.append(tool)
add_pydantic_model(model, name=None)

Add a Pydantic model to the settings, ensuring it matches the specified output type.

Parameters:

  • model (BaseModel) –

    The Pydantic model to add.

Raises:

  • NotImplementedError

    If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def add_pydantic_model(self, model: BaseModel, name: str = None):
    """
    Add a Pydantic model to the settings, ensuring it matches the specified output type.

    Args:
        model (BaseModel): The Pydantic model to add.

    Raises:
        NotImplementedError: If no structured output is specified.
    """
    if name is not None:
        model.__name__ = name
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.object_instance:
        self.pydantic_models.append(model)
    elif self.output_type is LlmStructuredOutputType.list_of_objects:
        self.pydantic_models.append(model)
    elif self.output_type is LlmStructuredOutputType.function_calling:
        self.function_tools.append(LlamaCppFunctionTool(model))
    elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
        self.function_tools.append(LlamaCppFunctionTool(model))
add_open_ai_tool(open_ai_schema_and_function, name=None)

Add an OpenAI tool to the settings, ensuring it matches the specified output type.

Parameters:

  • open_ai_schema_and_function (Tuple[Dict[str, Any], Callable]) –

    The OpenAI schema and associated function to add.

Raises:

  • NotImplementedError

    If the output type does not support adding tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def add_open_ai_tool(
        self, open_ai_schema_and_function: Tuple[Dict[str, Any], Callable], name: str = None
):
    """
    Add an OpenAI tool to the settings, ensuring it matches the specified output type.

    Args:
        open_ai_schema_and_function (Tuple[Dict[str, Any], Callable]): The OpenAI schema and associated function to add.

    Raises:
        NotImplementedError: If the output type does not support adding tools.
    """
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
        tool = LlamaCppFunctionTool(open_ai_schema_and_function)
        if name is not None:
            tool.set_name(name)
        self.function_tools.append(
            tool
        )
    else:
        raise NotImplementedError(
            f"LlmOutputType: {self.output_type.value} not supported for tools!"
        )
add_function_tool(function, name=None)

Add a callable function to the settings, ensuring it matches the specified output type.

Parameters:

  • function (Callable) –

    The function to add.

Raises:

  • NotImplementedError

    If the output type does not support adding tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def add_function_tool(self, function: Callable, name: str = None):
    """
    Add a callable function to the settings, ensuring it matches the specified output type.

    Args:
        function (Callable): The function to add.

    Raises:
        NotImplementedError: If the output type does not support adding tools.
    """
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
        tool = LlamaCppFunctionTool(function)
        if name is not None:
            tool.set_name(name)
        self.function_tools.append(
            tool
        )
    else:
        raise NotImplementedError(
            f"LlmOutputType: {self.output_type.value} not supported for tools!"
        )
add_llama_index_tool(tool, name=None)

Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.

Parameters:

  • tool

    The llama-index tool to add.

Raises:

  • NotImplementedError

    If the output type does not support adding tools.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def add_llama_index_tool(self, tool, name: str = None):
    """
    Add a llama-index tool, like QueryEngineTool, to the settings, ensuring it matches the specified output type.

    Args:
        tool: The llama-index tool to add.

    Raises:
        NotImplementedError: If the output type does not support adding tools.
    """
    if self.output_type is LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type is LlmStructuredOutputType.function_calling or self.output_type is LlmStructuredOutputType.parallel_function_calling:
        tool = LlamaCppFunctionTool.from_llama_index_tool(tool)
        if name is not None:
            tool.set_name(name)
        self.function_tools.append(
            tool
        )
    else:
        raise NotImplementedError(
            f"LlmOutputType: {self.output_type.value} not supported for tools!"
        )
get_llm_documentation(provider)

Generate documentation for the models and tools configured within the settings, based on the output type.

Returns:

  • str

    Generated documentation for the configured models or tools.

Raises:

  • NotImplementedError

    If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def get_llm_documentation(self, provider):
    """
    Generate documentation for the models and tools configured within the settings, based on the output type.

    Returns:
        str: Generated documentation for the configured models or tools.

    Raises:
        NotImplementedError: If no structured output is specified.
    """
    json_schema_mode = False
    from llama_cpp_agent.providers.tgi_server import TGIServerProvider
    from llama_cpp_agent.providers.vllm_server import VLLMServerProvider

    if isinstance(provider, TGIServerProvider) or isinstance(
            provider, VLLMServerProvider
    ):
        json_schema_mode = True
    if self.output_type == LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type == LlmStructuredOutputType.object_instance:
        return generate_text_documentation(
            self.pydantic_models, ordered_json_mode=json_schema_mode
        ).strip()
    elif self.output_type == LlmStructuredOutputType.list_of_objects:
        return generate_text_documentation(
            self.pydantic_models, ordered_json_mode=json_schema_mode
        ).strip()
    elif self.output_type == LlmStructuredOutputType.function_calling:
        return generate_text_documentation(
            [tool.model for tool in self.function_tools],
            model_prefix="Function",
            fields_prefix="Parameters",
            ordered_json_mode=json_schema_mode,
        ).strip()
    elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
        return generate_text_documentation(
            [tool.model for tool in self.function_tools],
            model_prefix="Function",
            fields_prefix="Parameters",
            ordered_json_mode=json_schema_mode,
        ).strip()
get_gbnf_grammar()

Generate a GBNF grammar for tools configured within the settings, based on the output type.

Returns:

  • str

    Generated GBNF grammar for the configured models or tools.

Raises:

  • NotImplementedError

    If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def get_gbnf_grammar(self):
    """
    Generate a GBNF grammar for tools configured within the settings, based on the output type.

    Returns:
        str: Generated GBNF grammar for the configured models or tools.

    Raises:
        NotImplementedError: If no structured output is specified.
    """

    if self.output_type == LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type == LlmStructuredOutputType.object_instance:
        return generate_gbnf_grammar_from_pydantic_models(
            self.pydantic_models,
            list_of_outputs=False,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_name_field_name),
            outer_object_content=(self.output_model_attributes_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_attributes_field_name),
            inner_thought_field_name=self.thoughts_and_reasoning_field_name,
            allow_only_inner_thoughts=False,
            add_request_heartbeat=False,
        )
    elif self.output_type == LlmStructuredOutputType.list_of_objects:
        return generate_gbnf_grammar_from_pydantic_models(
            self.pydantic_models,
            list_of_outputs=True,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_name_field_name),
            outer_object_content=(self.output_model_attributes_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.output_model_attributes_field_name),
            inner_thought_field_name=(self.thoughts_and_reasoning_field_name),
            allow_only_inner_thoughts=False,
            add_request_heartbeat=False,
        )
    elif self.output_type == LlmStructuredOutputType.function_calling:
        return generate_gbnf_grammar_from_pydantic_models(
            [tool.model for tool in self.function_tools],
            list_of_outputs=False,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_name_field_name),
            outer_object_content=(self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_content),
            inner_thought_field_name=self.thoughts_and_reasoning_field_name,
            allow_only_inner_thoughts=False,
            add_request_heartbeat=self.add_heartbeat_field,
            request_heartbeat_field_name=self.heartbeat_field_name,
            request_heartbeat_models=self.heartbeat_function_names_list
        )
    elif self.output_type == LlmStructuredOutputType.parallel_function_calling:
        return generate_gbnf_grammar_from_pydantic_models(
            [tool.model for tool in self.function_tools],
            list_of_outputs=True,
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            outer_object_name=(self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_name_field_name),
            outer_object_content=(self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else (self.function_calling_content),
            inner_thought_field_name=self.thoughts_and_reasoning_field_name,
            allow_only_inner_thoughts=False,
            add_request_heartbeat=self.add_heartbeat_field,
            request_heartbeat_field_name=self.heartbeat_field_name,
            request_heartbeat_models=self.heartbeat_function_names_list
        )
get_json_schema()

Generate a JSON schema for the tools configured within the settings, based on the output type.

Returns:

  • Dict

    Generated JSON schema for the configured models or tools.

Raises:

  • NotImplementedError

    If no structured output is specified.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def get_json_schema(self):
    """
    Generate a JSON schema for the tools configured within the settings, based on the output type.

    Returns:
        Dict: Generated JSON schema for the configured models or tools.

    Raises:
        NotImplementedError: If no structured output is specified.
    """

    if self.output_type == LlmStructuredOutputType.no_structured_output:
        raise NotImplementedError(
            "LlmOutputType: no_structured_output not supported for structured output and function calling!"
        )
    elif self.output_type == LlmStructuredOutputType.object_instance:
        return generate_json_schemas(
            self.pydantic_models,
            allow_list=False,
            outer_object_name=("001_" + self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.output_model_name_field_name),
            outer_object_properties_name=(
                    "002_" + self.output_model_attributes_field_name
            )
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.output_model_attributes_field_name),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
        )
    elif self.output_type == LlmStructuredOutputType.list_of_objects:
        return generate_json_schemas(
            self.pydantic_models,
            allow_list=True,
            outer_object_name=("001_" + self.output_model_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.output_model_name_field_name),
            outer_object_properties_name=(
                    "002_" + self.output_model_attributes_field_name
            )
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.output_model_attributes_field_name),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
        )
    elif self.output_type is LlmStructuredOutputType.function_calling:
        return generate_json_schemas(
            [tool.model for tool in self.function_tools],
            allow_list=False,
            outer_object_name=("001_" + self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.function_calling_name_field_name),
            outer_object_properties_name=("002_" + self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.function_calling_content),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            add_heartbeat=self.add_heartbeat_field,
            heartbeat_name=("003_" + self.heartbeat_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("004_" + self.heartbeat_field_name),
            heartbeat_list=self.heartbeat_function_names_list
        )
    elif self.output_type is LlmStructuredOutputType.parallel_function_calling:
        return generate_json_schemas(
            [tool.model for tool in self.function_tools],
            allow_list=True,
            outer_object_name=("001_" + self.function_calling_name_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("002_" + self.function_calling_name_field_name),
            outer_object_properties_name=("002_" + self.function_calling_content)
            if not self.add_thoughts_and_reasoning_field
            else ("003_" + self.function_calling_content),
            inner_thoughts_name=("001_" + self.thoughts_and_reasoning_field_name),
            add_inner_thoughts=self.add_thoughts_and_reasoning_field,
            add_heartbeat=self.add_heartbeat_field,
            heartbeat_name=("003_" + self.heartbeat_field_name)
            if not self.add_thoughts_and_reasoning_field
            else ("004_" + self.heartbeat_field_name),
            heartbeat_list=self.heartbeat_function_names_list
        )
add_function_name_to_heartbeat_list(function_name)

Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def add_function_name_to_heartbeat_list(self, function_name: str):
    """
    Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
    """
    self.heartbeat_function_names_list.append(function_name)
add_all_current_functions_to_heartbeat_list(excluded=None)

Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def add_all_current_functions_to_heartbeat_list(self, excluded: list[str] = None):
    """
    Add a function name to the heartbeat list. This way a heartbeat field get added to the function calling output.
    """
    if excluded is None:
        excluded = []

    self.heartbeat_function_names_list.extend(
        [tool.model.__name__ for tool in self.function_tools if tool.model.__name__ not in excluded]
    )
handle_function_call(function_call_response)

Handle a function call response and return the output.

Parameters:

  • function_call_response (dict) –

    The function call response.

Returns:

  • str

    The output of the function call or an error message.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def handle_function_call(self, function_call_response: Union[dict, List[dict]]):
    """
    Handle a function call response and return the output.

    Args:
        function_call_response (dict): The function call response.

    Returns:
        str: The output of the function call or an error message.
    """

    try:
        function_call = function_call_response
        if function_call is None:
            return "Error: Invalid function call response."
        if not self.output_type == LlmStructuredOutputType.parallel_function_calling:
            output = self.intern_function_call(function_call)
        else:
            output = self.intern_parallel_function_call(function_call)

        return output

    except AttributeError as e:
        return f"Error: {e}"
intern_function_call(function_call)

Internal method to handle a function call and return the output.

Parameters:

  • function_call (dict) –

    The function call dictionary.

Returns: str: The output of the function call or an error message.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def intern_function_call(self, function_call: dict):
    """
    Internal method to handle a function call and return the output.

    Args:
        function_call (dict): The function call dictionary.
    Returns:
        str: The output of the function call or an error message.
    """
    if self.function_calling_content in function_call:
        function_tool = None
        for tool in self.function_tools:
            if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                function_tool = tool
                break
        if function_tool is not None:
            cls = function_tool.model
            call_parameters = function_call[self.function_calling_content]
            call = cls(**call_parameters)
            output = call.run(**function_tool.additional_parameters)
            return [
                {
                    self.function_calling_name_field_name: function_tool.model.__name__,
                    self.function_calling_content: call_parameters,
                    "return_value": output,
                }
            ]
intern_parallel_function_call(function_calls)

Internal method to handle a function call and return the output.

Parameters:

  • function_calls List[dict]

    The function call dictionary.

Returns:

  • str

    The output of the function call or an error message.

Source code in llama_cpp_agent/llm_output_settings/settings.py
def intern_parallel_function_call(self, function_calls: List[dict]):
    """
    Internal method to handle a function call and return the output.

    Args:
        function_calls List[dict]: The function call dictionary.

    Returns:
        str: The output of the function call or an error message.
    """
    result = []
    for function_call in function_calls:
        if self.function_calling_content in function_call:
            function_tool = None
            for tool in self.function_tools:
                if tool.model.__name__ == function_call[self.function_calling_name_field_name]:
                    function_tool = tool
                    break
            if function_tool is not None:
                try:
                    cls = function_tool.model
                    call_parameters = function_call[self.function_calling_content]
                    call = cls(**call_parameters)
                    output = call.run(**function_tool.additional_parameters)
                    result.append(
                        {
                            self.function_calling_name_field_name: function_tool.model.__name__,
                            self.function_calling_content: call_parameters,
                            "return_value": output,
                        }
                    )

                except AttributeError as e:
                    return f"Error: {e}"

    return result

Function Calling Agent

llama_cpp_agent.function_calling_agent

activate_message_mode

Bases: BaseModel

Activates message mode.

Source code in llama_cpp_agent/function_calling_agent.py
class activate_message_mode(BaseModel):
    """
    Activates message mode.
    """

    def run(self, agent: "FunctionCallingAgent"):
        agent.without_grammar_mode = True
        agent.prompt_suffix = "\nWrite message in plain text format:"
        agent.without_grammar_mode_function.append(agent.send_message_to_user)
        return True

send_message

Bases: BaseModel

Sends a message to the user.

Source code in llama_cpp_agent/function_calling_agent.py
class send_message(BaseModel):
    """
    Sends a message to the user.
    """

    content: str = Field(..., description="Content of the message to be sent.")

    def run(self, agent: "FunctionCallingAgent"):
        agent.send_message_to_user(self.content)
        return "Message sent."

write_text_file

Bases: BaseModel

Writes content to a file.

Source code in llama_cpp_agent/function_calling_agent.py
class write_text_file(BaseModel):
    """
    Writes content to a file.
    """

    file_path: str = Field(..., description="The path to the file.")
    content: str = Field(..., description="The content to write to the file.")

    def run(self, agent: "FunctionCallingAgent"):
        self.write_file(self.content)
        return True

    def write_file(self, content: str):
        """
        Write content to a file.

        Args:
            content (str): The content to write to the file.
        """
        with open(self.file_path, "w", encoding="utf-8") as file:
            file.write(content)
        return None
write_file(content)

Write content to a file.

Parameters:

  • content (str) –

    The content to write to the file.

Source code in llama_cpp_agent/function_calling_agent.py
def write_file(self, content: str):
    """
    Write content to a file.

    Args:
        content (str): The content to write to the file.
    """
    with open(self.file_path, "w", encoding="utf-8") as file:
        file.write(content)
    return None

read_text_file

Bases: BaseModel

Reads the content of a file.

Source code in llama_cpp_agent/function_calling_agent.py
class read_text_file(BaseModel):
    """
    Reads the content of a file.
    """

    file_path: str = Field(..., description="The path to the file.")

    def run(self):
        return self.read_file()

    def read_file(self):
        """
        Reads the content of a file.
        """
        if os.path.exists(self.file_path):
            with open(self.file_path, "r", encoding="utf-8") as file:
                return file.read()
        else:
            return f"File not found."
read_file()

Reads the content of a file.

Source code in llama_cpp_agent/function_calling_agent.py
def read_file(self):
    """
    Reads the content of a file.
    """
    if os.path.exists(self.file_path):
        with open(self.file_path, "r", encoding="utf-8") as file:
            return file.read()
    else:
        return f"File not found."

FunctionCallingAgent

An agent that uses function calling to interact with its environment and the user.

Parameters:

  • llama_llm (Llama | LlamaLLMSettings | LlamaCppEndpointSettings | OpenAIEndpointSettings) –

    An instance of Llama, LlamaLLMSettings, LlamaCppEndpointSettings or LlamaCppServerLLMSettings as LLM.

  • llama_generation_settings (LlamaLLMGenerationSettings | LlamaCppGenerationSettings | OpenAIGenerationSettings) –

    Generation settings for Llama.

  • messages_formatter_type (MessagesFormatterType, default: CHATML ) –

    Type of messages formatter.

  • custom_messages_formatter (MessagesFormatter, default: None ) –

    Optional Custom messages formatter.

  • streaming_callback (Callable[[StreamingResponse], None], default: None ) –

    Callback function for streaming responses.

  • k_last_messages_from_chat_history (int, default: 0 ) –

    Number of last messages to consider from chat history.

  • system_prompt (str, default: None ) –

    System prompt for interaction.

  • llama_cpp_function_tools (List[LlamaCppFunctionTool], default: None ) –

    List of LlamaCppFunctionTool instances.

  • allow_parallel_function_calling (bool, default: False ) –

    Allow parallel function calling (Default=False)

  • add_send_message_to_user_function (bool, default: True ) –

    Flag to add send_message_to_user function.

  • send_message_to_user_callback (Callable[[str], None], default: None ) –

    Callback for sending a message to the user.

  • debug_output (bool, default: False ) –

    Enable debug output.

Attributes:

  • send_message_to_user_callback (Callable[[str], None]) –

    Callback for sending a message to the user.

  • llama_cpp_tools (List[LlamaCppFunctionTool]) –

    List of LlamaCppFunctionTool instances.

  • tool_registry (LlamaCppFunctionToolRegistry) –

    Function tool registry.

  • llama_generation_settings (LlamaLLMGenerationSettings) –

    Generation settings for Llama.

  • system_prompt (str) –

    System prompt for interaction.

  • llama_cpp_agent (LlamaCppAgent) –

    LlamaCppAgent instance for interaction.

  • k_last_messages_from_chat_history (int) –

    Number of last messages to consider from chat history.

  • streaming_callback (Callable[[StreamingResponse], None]) –

    Callback function for streaming responses.

Methods:

  • save

    str): Save the agent's state to a file.

  • load_from_file

    str, llama_llm, python_functions, pydantic_functions, send_message_to_user_callback, streaming_callback) -> FunctionCallingAgent: Load the agent's state from a file.

  • load_from_dict

    dict) -> FunctionCallingAgent: Load the agent's state from a dictionary.

  • as_dict

    Convert the agent's state to a dictionary.

  • generate_response

    str): Generate a response based on the input message.

  • send_message_to_user

    str): Send a message to the user.

Source code in llama_cpp_agent/function_calling_agent.py
class FunctionCallingAgent:
    """
    An agent that uses function calling to interact with its environment and the user.

    Args:
        llama_llm (Llama | LlamaLLMSettings | LlamaCppEndpointSettings | OpenAIEndpointSettings): An instance of Llama, LlamaLLMSettings, LlamaCppEndpointSettings or LlamaCppServerLLMSettings as LLM.
        llama_generation_settings (LlamaLLMGenerationSettings | LlamaCppGenerationSettings | OpenAIGenerationSettings): Generation settings for Llama.
        messages_formatter_type (MessagesFormatterType): Type of messages formatter.
        custom_messages_formatter (MessagesFormatter): Optional Custom messages formatter.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
        k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
        system_prompt (str): System prompt for interaction.
        llama_cpp_function_tools(List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
        allow_parallel_function_calling (bool): Allow parallel function calling (Default=False)
        add_send_message_to_user_function (bool): Flag to add send_message_to_user function.
        send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
        debug_output (bool): Enable debug output.

    Attributes:
        send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
        llama_cpp_tools (List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
        tool_registry (LlamaCppFunctionToolRegistry): Function tool registry.
        llama_generation_settings (LlamaLLMGenerationSettings): Generation settings for Llama.
        system_prompt (str): System prompt for interaction.
        llama_cpp_agent (LlamaCppAgent): LlamaCppAgent instance for interaction.
        k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.

    Methods:
        save(file_path: str): Save the agent's state to a file.
        load_from_file(file_path: str, llama_llm, python_functions, pydantic_functions, send_message_to_user_callback, streaming_callback) -> FunctionCallingAgent:
            Load the agent's state from a file.
        load_from_dict(agent_dict: dict) -> FunctionCallingAgent: Load the agent's state from a dictionary.
        as_dict() -> dict: Convert the agent's state to a dictionary.
        generate_response(message: str): Generate a response based on the input message.
        send_message_to_user(message: str): Send a message to the user.

    """

    def __init__(
        self,
        llama_llm: LlmProvider,
        messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        k_last_messages_from_chat_history: int = 0,
        system_prompt: str = None,
        llama_cpp_function_tools: [LlamaCppFunctionTool] = None,
        basic_file_tools: bool = False,
        allow_parallel_function_calling=False,
        add_send_message_to_user_function: bool = True,
        send_message_to_user_callback: Callable[[str], None] = None,
        debug_output: bool = False,
    ):
        """
        Initialize the FunctionCallingAgent.

        Args:
            llama_llm (LlmProvider): The LLM Provider.
            messages_formatter_type (MessagesFormatterType): Type of messages formatter.
            custom_messages_formatter (MessagesFormatter): Optional Custom messages formatter.
            streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
            k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
            system_prompt (str): System prompt for interaction.
            llama_cpp_function_tools(List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
            allow_parallel_function_calling (bool): Allow parallel function calling (Default=False)
            add_send_message_to_user_function (bool): Flag to add send_message_to_user function.
            send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
            debug_output (bool): Enable debug output.
        """
        self.llama_cpp_tools = []
        if llama_cpp_function_tools:
            self.llama_cpp_tools = llama_cpp_function_tools

        self.send_message_to_user_callback = send_message_to_user_callback
        if add_send_message_to_user_function:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(send_message, agent=self))

        if basic_file_tools:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(read_text_file))
            self.llama_cpp_tools.append(
                LlamaCppFunctionTool(write_text_file, agent=self)
            )

        self.allow_parallel_function_calling = allow_parallel_function_calling

        self.structured_output_settings = (
            LlmStructuredOutputSettings.from_llama_cpp_function_tools(
                self.llama_cpp_tools, self.allow_parallel_function_calling
            )
        )
        self.structured_output_settings.add_thoughts_and_reasoning_field = True
        self.without_grammar_mode = False
        self.without_grammar_mode_function = []
        self.prompt_suffix = ""
        if system_prompt is not None:
            self.system_prompt = system_prompt
        else:
            self.system_prompt = """You are Funky, an AI assistant that calls functions to perform tasks. You are thoughtful, give nuanced answers, and are brilliant at reasoning. Below is a list of functions you can use to interact with the system. Each function has specific parameters and requirements. Make sure to follow the instructions for each function carefully.
Choose the appropriate function based on the task you want to perform. Provide your function calls in JSON format."""
        self.llama_cpp_agent = LlamaCppAgent(
            llama_llm,
            debug_output=debug_output,
            system_prompt=self.system_prompt,
            predefined_messages_formatter_type=messages_formatter_type,
            custom_messages_formatter=custom_messages_formatter,
        )

        self.k_last_messages_from_chat_history = k_last_messages_from_chat_history
        self.streaming_callback = streaming_callback


    @staticmethod
    def load_from_dict(agent_dict: dict) -> "FunctionCallingAgent":
        """
        Load the agent's state from a dictionary.

        Args:
            agent_dict (dict): The dictionary containing the agent's state.

        Returns:
            FunctionCallingAgent: The loaded FunctionCallingAgent instance.
        """
        return FunctionCallingAgent(**agent_dict)

    def as_dict(self) -> dict:
        """
        Convert the agent's state to a dictionary.

        Returns:
           dict: The dictionary representation of the agent's state.
        """
        return self.__dict__

    def generate_response(
        self,
        message: str,
        llm_sampling_settings: LlmSamplingSettings = None,
        structured_output_settings: LlmStructuredOutputSettings = None,
    ):
        self.llama_cpp_agent.add_message(role=Roles.user, message=message)

        if structured_output_settings is not None:
            structured_output_settings.add_thoughts_and_reasoning_field = True

        result = self.intern_get_response(llm_sampling_settings=llm_sampling_settings, structured_output_settings=structured_output_settings)

        while True:
            if isinstance(result, str):
                if len(self.without_grammar_mode_function) > 0:
                    func_list = []
                    for func in self.without_grammar_mode_function:
                        if func.__name__ not in func_list:
                            func(result.strip())
                            func_list.append(func.__name__)
                break
            function_message = f"""Function Calling Results:\n\n"""
            count = 0
            if result is not None:
                agent_sent_message = False
                for res in result:
                    count += 1
                    if res["function"] == "send_message":
                        agent_sent_message = True
                    if not isinstance(res, str):
                        if "params" in res:
                            function_message += f"""{count}. Function: "{res["function"]}"\nArguments: "{res["params"]}"\nReturn Value: {res["return_value"]}\n\n"""
                        else:
                            function_message += f"""{count}. Function: "{res["function"]}"\nReturn Value: {res["return_value"]}\n\n"""
                    else:
                        function_message += f"{count}. " + res + "\n\n"
                self.llama_cpp_agent.add_message(
                    role=Roles.tool, message=function_message.strip()
                )
                if agent_sent_message:
                    break
            result = self.intern_get_response(
                llm_sampling_settings=llm_sampling_settings, structured_output_settings=structured_output_settings
            )
        return result

    def intern_get_response(
        self,
        llm_sampling_settings: List[str] = None,
        structured_output_settings: LlmStructuredOutputSettings = None,
    ):
        without_grammar_mode = False
        if self.without_grammar_mode:
            without_grammar_mode = True
            self.without_grammar_mode = False
        result = self.llama_cpp_agent.get_chat_response(
            streaming_callback=self.streaming_callback,
            structured_output_settings=self.structured_output_settings
            if structured_output_settings is None
            else structured_output_settings,
            llm_sampling_settings=llm_sampling_settings,
        )
        if without_grammar_mode:
            self.prompt_suffix = ""
        return result

    def send_message_to_user(self, message: str):
        """
        Send a message to the user.

        Args:
            message: The message send to the user.
        """
        if self.send_message_to_user_callback:
            self.send_message_to_user_callback(message)
        else:
            print(message)
__init__(llama_llm, messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, streaming_callback=None, k_last_messages_from_chat_history=0, system_prompt=None, llama_cpp_function_tools=None, basic_file_tools=False, allow_parallel_function_calling=False, add_send_message_to_user_function=True, send_message_to_user_callback=None, debug_output=False)

Initialize the FunctionCallingAgent.

Parameters:

  • llama_llm (LlmProvider) –

    The LLM Provider.

  • messages_formatter_type (MessagesFormatterType, default: CHATML ) –

    Type of messages formatter.

  • custom_messages_formatter (MessagesFormatter, default: None ) –

    Optional Custom messages formatter.

  • streaming_callback (Callable[[StreamingResponse], None], default: None ) –

    Callback function for streaming responses.

  • k_last_messages_from_chat_history (int, default: 0 ) –

    Number of last messages to consider from chat history.

  • system_prompt (str, default: None ) –

    System prompt for interaction.

  • llama_cpp_function_tools (List[LlamaCppFunctionTool], default: None ) –

    List of LlamaCppFunctionTool instances.

  • allow_parallel_function_calling (bool, default: False ) –

    Allow parallel function calling (Default=False)

  • add_send_message_to_user_function (bool, default: True ) –

    Flag to add send_message_to_user function.

  • send_message_to_user_callback (Callable[[str], None], default: None ) –

    Callback for sending a message to the user.

  • debug_output (bool, default: False ) –

    Enable debug output.

Source code in llama_cpp_agent/function_calling_agent.py
    def __init__(
        self,
        llama_llm: LlmProvider,
        messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        k_last_messages_from_chat_history: int = 0,
        system_prompt: str = None,
        llama_cpp_function_tools: [LlamaCppFunctionTool] = None,
        basic_file_tools: bool = False,
        allow_parallel_function_calling=False,
        add_send_message_to_user_function: bool = True,
        send_message_to_user_callback: Callable[[str], None] = None,
        debug_output: bool = False,
    ):
        """
        Initialize the FunctionCallingAgent.

        Args:
            llama_llm (LlmProvider): The LLM Provider.
            messages_formatter_type (MessagesFormatterType): Type of messages formatter.
            custom_messages_formatter (MessagesFormatter): Optional Custom messages formatter.
            streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
            k_last_messages_from_chat_history (int): Number of last messages to consider from chat history.
            system_prompt (str): System prompt for interaction.
            llama_cpp_function_tools(List[LlamaCppFunctionTool]): List of LlamaCppFunctionTool instances.
            allow_parallel_function_calling (bool): Allow parallel function calling (Default=False)
            add_send_message_to_user_function (bool): Flag to add send_message_to_user function.
            send_message_to_user_callback (Callable[[str], None]): Callback for sending a message to the user.
            debug_output (bool): Enable debug output.
        """
        self.llama_cpp_tools = []
        if llama_cpp_function_tools:
            self.llama_cpp_tools = llama_cpp_function_tools

        self.send_message_to_user_callback = send_message_to_user_callback
        if add_send_message_to_user_function:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(send_message, agent=self))

        if basic_file_tools:
            self.llama_cpp_tools.append(LlamaCppFunctionTool(read_text_file))
            self.llama_cpp_tools.append(
                LlamaCppFunctionTool(write_text_file, agent=self)
            )

        self.allow_parallel_function_calling = allow_parallel_function_calling

        self.structured_output_settings = (
            LlmStructuredOutputSettings.from_llama_cpp_function_tools(
                self.llama_cpp_tools, self.allow_parallel_function_calling
            )
        )
        self.structured_output_settings.add_thoughts_and_reasoning_field = True
        self.without_grammar_mode = False
        self.without_grammar_mode_function = []
        self.prompt_suffix = ""
        if system_prompt is not None:
            self.system_prompt = system_prompt
        else:
            self.system_prompt = """You are Funky, an AI assistant that calls functions to perform tasks. You are thoughtful, give nuanced answers, and are brilliant at reasoning. Below is a list of functions you can use to interact with the system. Each function has specific parameters and requirements. Make sure to follow the instructions for each function carefully.
Choose the appropriate function based on the task you want to perform. Provide your function calls in JSON format."""
        self.llama_cpp_agent = LlamaCppAgent(
            llama_llm,
            debug_output=debug_output,
            system_prompt=self.system_prompt,
            predefined_messages_formatter_type=messages_formatter_type,
            custom_messages_formatter=custom_messages_formatter,
        )

        self.k_last_messages_from_chat_history = k_last_messages_from_chat_history
        self.streaming_callback = streaming_callback
load_from_dict(agent_dict) staticmethod

Load the agent's state from a dictionary.

Parameters:

  • agent_dict (dict) –

    The dictionary containing the agent's state.

Returns:

Source code in llama_cpp_agent/function_calling_agent.py
@staticmethod
def load_from_dict(agent_dict: dict) -> "FunctionCallingAgent":
    """
    Load the agent's state from a dictionary.

    Args:
        agent_dict (dict): The dictionary containing the agent's state.

    Returns:
        FunctionCallingAgent: The loaded FunctionCallingAgent instance.
    """
    return FunctionCallingAgent(**agent_dict)
as_dict()

Convert the agent's state to a dictionary.

Returns:

  • dict ( dict ) –

    The dictionary representation of the agent's state.

Source code in llama_cpp_agent/function_calling_agent.py
def as_dict(self) -> dict:
    """
    Convert the agent's state to a dictionary.

    Returns:
       dict: The dictionary representation of the agent's state.
    """
    return self.__dict__
send_message_to_user(message)

Send a message to the user.

Parameters:

  • message (str) –

    The message send to the user.

Source code in llama_cpp_agent/function_calling_agent.py
def send_message_to_user(self, message: str):
    """
    Send a message to the user.

    Args:
        message: The message send to the user.
    """
    if self.send_message_to_user_callback:
        self.send_message_to_user_callback(message)
    else:
        print(message)

Structured Output Agent

llama_cpp_agent.structured_output_agent

StructuredOutputAgent

An agent that creates structured output based on pydantic models from unstructured text.

Parameters:

  • llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]) –

    An instance of Llama, LlamaLLMSettings, LlamaCppServerLLMSettings, OpenAIEndpointSettings as LLM.

  • llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]) –

    Generation settings for Llama or LlamaCppServer.

  • messages_formatter_type (MessagesFormatterType, default: CHATML ) –

    Type of messages formatter.

  • custom_messages_formatter (MessagesFormatter, default: None ) –

    Custom messages formatter.

  • streaming_callback (Callable[[StreamingResponse], None], default: None ) –

    Callback function for streaming responses.

  • debug_output (bool, default: False ) –

    Enable debug output.

Attributes:

  • llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppServerGenerationSettings]) –

    Generation settings for Llama or LlamaCppServer.

  • grammar_cache (dict) –

    Cache for generated grammars.

  • system_prompt_template (PromptTemplate) –

    Template for the system prompt.

  • creation_prompt_template (PromptTemplate) –

    Template for the creation prompt.

  • llama_cpp_agent (LlamaCppAgent) –

    LlamaCppAgent instance for interaction.

  • streaming_callback (Callable[[StreamingResponse], None]) –

    Callback function for streaming responses.

Methods:

  • save

    str): Save the agent's state to a file.

  • load_from_file

    str, llama_llm, streaming_callback) -> StructuredOutputAgent: Load the agent's state from a file.

  • load_from_dict

    dict) -> StructuredOutputAgent: Load the agent's state from a dictionary.

  • as_dict

    Convert the agent's state to a dictionary.

  • create_object

    Type[BaseModel], data: str = "") -> object: Create an object of the given model from the given data.

Source code in llama_cpp_agent/structured_output_agent.py
class StructuredOutputAgent:
    """
    An agent that creates structured output based on pydantic models from unstructured text.

    Args:
        llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]): An instance of Llama, LlamaLLMSettings, LlamaCppServerLLMSettings, OpenAIEndpointSettings as LLM.
        llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]): Generation settings for Llama or LlamaCppServer.
        messages_formatter_type (MessagesFormatterType): Type of messages formatter.
        custom_messages_formatter (MessagesFormatter): Custom messages formatter.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
        debug_output (bool): Enable debug output.

    Attributes:
        llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppServerGenerationSettings]): Generation settings for Llama or LlamaCppServer.
        grammar_cache (dict): Cache for generated grammars.
        system_prompt_template (PromptTemplate): Template for the system prompt.
        creation_prompt_template (PromptTemplate): Template for the creation prompt.
        llama_cpp_agent (LlamaCppAgent): LlamaCppAgent instance for interaction.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.

    Methods:
        save(file_path: str): Save the agent's state to a file.
        load_from_file(file_path: str, llama_llm, streaming_callback) -> StructuredOutputAgent: Load the agent's state from a file.
        load_from_dict(agent_dict: dict) -> StructuredOutputAgent: Load the agent's state from a dictionary.
        as_dict() -> dict: Convert the agent's state to a dictionary.
        create_object(model: Type[BaseModel], data: str = "") -> object: Create an object of the given model from the given data.

    """

    def __init__(
        self,
        llama_llm: LlmProvider,
        messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
        custom_messages_formatter: MessagesFormatter = None,
        streaming_callback: Callable[[StreamingResponse], None] = None,
        debug_output: bool = False,
    ):
        """
        Initialize the StructuredOutputAgent.

        Args:
            llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]): An instance of Llama, LlamaLLMSettings, or LlamaCppServerLLMSettings as LLM.
            llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]): Generation settings for Llama or LlamaCppServer or OpenAIEndpoint.
            messages_formatter_type (MessagesFormatterType): Type of messages formatter.
            custom_messages_formatter (MessagesFormatter): Custom messages formatter.
            streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
            debug_output (bool): Enable debug output.
        """
        self.grammar_cache = {}
        self.system_prompt_template = PromptTemplate.from_string(
            "You are an advanced AI agent. You are tasked to assist the user by creating structured output in JSON format.\n\n{documentation}"
        )
        self.creation_prompt_template = PromptTemplate.from_string(
            "Create an JSON response based on the following input.\n\nInput:\n\n{user_input}"
        )

        self.llama_cpp_agent = LlamaCppAgent(
            llama_llm,
            debug_output=debug_output,
            system_prompt="",
            predefined_messages_formatter_type=messages_formatter_type,
            custom_messages_formatter=custom_messages_formatter,
        )
        self.streaming_callback = streaming_callback

    def save(self, file_path: str):
        """
        Save the agent's state to a file.

        Args:
            file_path (str): The path to the file.
        """
        with open(file_path, "w", encoding="utf-8") as file:
            dic = copy(self.as_dict())
            del dic["llama_cpp_agent"]
            del dic["grammar_cache"]
            del dic["system_prompt_template"]
            del dic["creation_prompt_template"]
            del dic["streaming_callback"]
            dic["debug_output"] = self.llama_cpp_agent.debug_output
            dic["llama_generation_settings"] = self.llama_generation_settings.as_dict()
            dic[
                "custom_messages_formatter"
            ] = self.llama_cpp_agent.messages_formatter.as_dict()
            json.dump(dic, file, indent=4)

    def as_dict(self) -> dict:
        """
        Convert the agent's state to a dictionary.

        Returns:
            dict: The dictionary representation of the agent's state.
        """
        return self.__dict__

    def create_object(
        self,
        model: Type[BaseModel],
        data: str = "",
        llm_sampling_settings: LlmSamplingSettings = None,
        returns_streaming_generator: bool = False,
    ) -> object:
        """
        Creates an object of the given model from the given data.

        Args:
            model (Type[BaseModel]): The model to create the object from.
            data (str): The data to create the object from.

        Returns:
            object: The created object.
        """
        output_settings = LlmStructuredOutputSettings.from_pydantic_models(
            [model], output_type=LlmStructuredOutputType.object_instance
        )

        system_prompt = self.system_prompt_template.generate_prompt(
            {
                "documentation": output_settings.get_llm_documentation(
                    self.llama_cpp_agent.provider
                ).strip()
            }
        )
        if data == "":
            prompt = "Create a random JSON response based on the response model."
        else:
            prompt = self.creation_prompt_template.generate_prompt({"user_input": data})
        response = self.llama_cpp_agent.get_chat_response(
            prompt,
            system_prompt=system_prompt,
            returns_streaming_generator=returns_streaming_generator,
            add_response_to_chat_history=False,
            add_message_to_chat_history=False,
            streaming_callback=self.streaming_callback,
            structured_output_settings=output_settings,
            llm_sampling_settings=llm_sampling_settings,
        )
        return response
__init__(llama_llm, messages_formatter_type=MessagesFormatterType.CHATML, custom_messages_formatter=None, streaming_callback=None, debug_output=False)

Initialize the StructuredOutputAgent.

Parameters:

  • llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]) –

    An instance of Llama, LlamaLLMSettings, or LlamaCppServerLLMSettings as LLM.

  • llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]) –

    Generation settings for Llama or LlamaCppServer or OpenAIEndpoint.

  • messages_formatter_type (MessagesFormatterType, default: CHATML ) –

    Type of messages formatter.

  • custom_messages_formatter (MessagesFormatter, default: None ) –

    Custom messages formatter.

  • streaming_callback (Callable[[StreamingResponse], None], default: None ) –

    Callback function for streaming responses.

  • debug_output (bool, default: False ) –

    Enable debug output.

Source code in llama_cpp_agent/structured_output_agent.py
def __init__(
    self,
    llama_llm: LlmProvider,
    messages_formatter_type: MessagesFormatterType = MessagesFormatterType.CHATML,
    custom_messages_formatter: MessagesFormatter = None,
    streaming_callback: Callable[[StreamingResponse], None] = None,
    debug_output: bool = False,
):
    """
    Initialize the StructuredOutputAgent.

    Args:
        llama_llm (Union[Llama, LlamaLLMSettings, LlamaCppEndpointSettings, OpenAIEndpointSettings]): An instance of Llama, LlamaLLMSettings, or LlamaCppServerLLMSettings as LLM.
        llama_generation_settings (Union[LlamaLLMGenerationSettings, LlamaCppGenerationSettings, OpenAIGenerationSettings]): Generation settings for Llama or LlamaCppServer or OpenAIEndpoint.
        messages_formatter_type (MessagesFormatterType): Type of messages formatter.
        custom_messages_formatter (MessagesFormatter): Custom messages formatter.
        streaming_callback (Callable[[StreamingResponse], None]): Callback function for streaming responses.
        debug_output (bool): Enable debug output.
    """
    self.grammar_cache = {}
    self.system_prompt_template = PromptTemplate.from_string(
        "You are an advanced AI agent. You are tasked to assist the user by creating structured output in JSON format.\n\n{documentation}"
    )
    self.creation_prompt_template = PromptTemplate.from_string(
        "Create an JSON response based on the following input.\n\nInput:\n\n{user_input}"
    )

    self.llama_cpp_agent = LlamaCppAgent(
        llama_llm,
        debug_output=debug_output,
        system_prompt="",
        predefined_messages_formatter_type=messages_formatter_type,
        custom_messages_formatter=custom_messages_formatter,
    )
    self.streaming_callback = streaming_callback
save(file_path)

Save the agent's state to a file.

Parameters:

  • file_path (str) –

    The path to the file.

Source code in llama_cpp_agent/structured_output_agent.py
def save(self, file_path: str):
    """
    Save the agent's state to a file.

    Args:
        file_path (str): The path to the file.
    """
    with open(file_path, "w", encoding="utf-8") as file:
        dic = copy(self.as_dict())
        del dic["llama_cpp_agent"]
        del dic["grammar_cache"]
        del dic["system_prompt_template"]
        del dic["creation_prompt_template"]
        del dic["streaming_callback"]
        dic["debug_output"] = self.llama_cpp_agent.debug_output
        dic["llama_generation_settings"] = self.llama_generation_settings.as_dict()
        dic[
            "custom_messages_formatter"
        ] = self.llama_cpp_agent.messages_formatter.as_dict()
        json.dump(dic, file, indent=4)
as_dict()

Convert the agent's state to a dictionary.

Returns:

  • dict ( dict ) –

    The dictionary representation of the agent's state.

Source code in llama_cpp_agent/structured_output_agent.py
def as_dict(self) -> dict:
    """
    Convert the agent's state to a dictionary.

    Returns:
        dict: The dictionary representation of the agent's state.
    """
    return self.__dict__
create_object(model, data='', llm_sampling_settings=None, returns_streaming_generator=False)

Creates an object of the given model from the given data.

Parameters:

  • model (Type[BaseModel]) –

    The model to create the object from.

  • data (str, default: '' ) –

    The data to create the object from.

Returns:

  • object ( object ) –

    The created object.

Source code in llama_cpp_agent/structured_output_agent.py
def create_object(
    self,
    model: Type[BaseModel],
    data: str = "",
    llm_sampling_settings: LlmSamplingSettings = None,
    returns_streaming_generator: bool = False,
) -> object:
    """
    Creates an object of the given model from the given data.

    Args:
        model (Type[BaseModel]): The model to create the object from.
        data (str): The data to create the object from.

    Returns:
        object: The created object.
    """
    output_settings = LlmStructuredOutputSettings.from_pydantic_models(
        [model], output_type=LlmStructuredOutputType.object_instance
    )

    system_prompt = self.system_prompt_template.generate_prompt(
        {
            "documentation": output_settings.get_llm_documentation(
                self.llama_cpp_agent.provider
            ).strip()
        }
    )
    if data == "":
        prompt = "Create a random JSON response based on the response model."
    else:
        prompt = self.creation_prompt_template.generate_prompt({"user_input": data})
    response = self.llama_cpp_agent.get_chat_response(
        prompt,
        system_prompt=system_prompt,
        returns_streaming_generator=returns_streaming_generator,
        add_response_to_chat_history=False,
        add_message_to_chat_history=False,
        streaming_callback=self.streaming_callback,
        structured_output_settings=output_settings,
        llm_sampling_settings=llm_sampling_settings,
    )
    return response

Misc

Messages Formatter

llama_cpp_agent.messages_formatter

MessagesFormatterType

Bases: Enum

Enum representing different types of predefined messages formatters.

Source code in llama_cpp_agent/messages_formatter.py
class MessagesFormatterType(Enum):
    """
    Enum representing different types of predefined messages formatters.
    """

    MISTRAL = 1
    CHATML = 2
    VICUNA = 3
    LLAMA_2 = 4
    SYNTHIA = 5
    NEURAL_CHAT = 6
    SOLAR = 7
    OPEN_CHAT = 8
    ALPACA = 9
    CODE_DS = 10
    B22 = 11
    LLAMA_3 = 12
    PHI_3 = 13
    OPEN_INTERPRETER = 14
    AUTOCODER = 15
    GEMMA_2 = 16
    DEEP_SEEK_CODER_2 = 17
    PHI_4 = 18
    DEEPSEEK_R1_DISTILL_QWEN = 19
    MISTRAL_SMALL_3 = 20

deepseek_r1_distill_qwen_chat_prompt_markers = {Roles.system: PromptMarkers('<|begin▁of▁sentence|>', ''), Roles.user: PromptMarkers('<|User|>', ''), Roles.assistant: PromptMarkers('<|Assistant|>', ''), Roles.tool: PromptMarkers('', '')} module-attribute

Instruction:

{prompt}

Response:

get_predefined_messages_formatter(formatter_type)

Gets a predefined messages formatter based on the formatter type.

Parameters:

Returns:

  • MessagesFormatter ( MessagesFormatter ) –

    The predefined messages formatter.

Source code in llama_cpp_agent/messages_formatter.py
def get_predefined_messages_formatter(
        formatter_type: MessagesFormatterType,
) -> MessagesFormatter:
    """
    Gets a predefined messages formatter based on the formatter type.

    Args:
        formatter_type (MessagesFormatterType): The type of messages formatter.

    Returns:
        MessagesFormatter: The predefined messages formatter.
    """
    return predefined_formatter[formatter_type]

Prompt template

llama_cpp_agent.llm_prompt_template

PromptTemplateField dataclass

Data class representing a field in a prompt template.

Attributes:

  • name (str) –

    The name of the template field.

  • value (str) –

    The value associated with the template field.

Source code in llama_cpp_agent/llm_prompt_template.py
@dataclass
class PromptTemplateField:
    """
    Data class representing a field in a prompt template.

    Attributes:
        name (str): The name of the template field.
        value (str): The value associated with the template field.
    """

    name: str
    value: str

PromptTemplateFields

Class representing a collection of PromptTemplateField objects.

Methods:

  • add_field

    str, value: str): Add a new field to the collection.

  • remove_field

    str): Remove a field by name from the collection.

  • edit_field

    str, new_value: str): Edit the value of an existing field.

  • find_field

    str) -> PromptTemplateField: Find and return a field by name.

  • list_fields

    Get a list of all fields in the collection.

  • get_fields_dict

    Get a dictionary representation of the fields.

  • set_fields_from_dict

    Dict[str, str]): Set the fields using a dictionary.

Attributes:

Source code in llama_cpp_agent/llm_prompt_template.py
class PromptTemplateFields:
    """
    Class representing a collection of PromptTemplateField objects.

    Methods:
        add_field(name: str, value: str): Add a new field to the collection.
        remove_field(name: str): Remove a field by name from the collection.
        edit_field(name: str, new_value: str): Edit the value of an existing field.
        find_field(name: str) -> PromptTemplateField: Find and return a field by name.
        list_fields() -> List[PromptTemplateField]: Get a list of all fields in the collection.
        get_fields_dict() -> Dict[str, str]: Get a dictionary representation of the fields.
        set_fields_from_dict(field_dict: Dict[str, str]): Set the fields using a dictionary.

    Attributes:
        fields (List[PromptTemplateField]): List of PromptTemplateField objects.
    """

    def __init__(self):
        self.fields: List[PromptTemplateField] = []

    def add_field(self, name: str, value: str):
        """Add a new field to the collection."""
        self.fields.append(PromptTemplateField(name, value))

    def remove_field(self, name: str):
        """Remove a field by name from the collection."""
        self.fields = [field for field in self.fields if field.name != name]

    def edit_field(self, name: str, new_value: str):
        """Edit the value of an existing field."""
        field = self.find_field(name)
        if field:
            field.value = new_value
        else:
            raise ValueError(f"Field '{name}' not found.")

    def find_field(self, name: str) -> PromptTemplateField:
        """Find and return a field by name."""
        for field in self.fields:
            if field.name == name:
                return field
        return None

    def list_fields(self):
        """Get a list of all fields in the collection."""
        return self.fields

    def get_fields_dict(self) -> Dict[str, str]:
        """Get a dictionary representation of the fields."""
        return {field.name: field.value for field in self.fields}

    def set_fields_from_dict(self, field_dict: Dict[str, str]):
        """Set the fields using a dictionary."""
        self.fields.clear()
        for name, value in field_dict.items():
            self.add_field(name, value)
add_field(name, value)

Add a new field to the collection.

Source code in llama_cpp_agent/llm_prompt_template.py
def add_field(self, name: str, value: str):
    """Add a new field to the collection."""
    self.fields.append(PromptTemplateField(name, value))
remove_field(name)

Remove a field by name from the collection.

Source code in llama_cpp_agent/llm_prompt_template.py
def remove_field(self, name: str):
    """Remove a field by name from the collection."""
    self.fields = [field for field in self.fields if field.name != name]
edit_field(name, new_value)

Edit the value of an existing field.

Source code in llama_cpp_agent/llm_prompt_template.py
def edit_field(self, name: str, new_value: str):
    """Edit the value of an existing field."""
    field = self.find_field(name)
    if field:
        field.value = new_value
    else:
        raise ValueError(f"Field '{name}' not found.")
find_field(name)

Find and return a field by name.

Source code in llama_cpp_agent/llm_prompt_template.py
def find_field(self, name: str) -> PromptTemplateField:
    """Find and return a field by name."""
    for field in self.fields:
        if field.name == name:
            return field
    return None
list_fields()

Get a list of all fields in the collection.

Source code in llama_cpp_agent/llm_prompt_template.py
def list_fields(self):
    """Get a list of all fields in the collection."""
    return self.fields
get_fields_dict()

Get a dictionary representation of the fields.

Source code in llama_cpp_agent/llm_prompt_template.py
def get_fields_dict(self) -> Dict[str, str]:
    """Get a dictionary representation of the fields."""
    return {field.name: field.value for field in self.fields}
set_fields_from_dict(field_dict)

Set the fields using a dictionary.

Source code in llama_cpp_agent/llm_prompt_template.py
def set_fields_from_dict(self, field_dict: Dict[str, str]):
    """Set the fields using a dictionary."""
    self.fields.clear()
    for name, value in field_dict.items():
        self.add_field(name, value)

PromptTemplate

Class representing a prompt template.

Methods:

  • generate_prompt

    Union[dict, PromptTemplateFields], remove_empty_template_field=True) -> str:

Class Methods

from_string(template_string: str) -> PromptTemplate: Create a PromptTemplate from a string. from_file(template_file: str) -> PromptTemplate: Create a PromptTemplate from a file.

Attributes:

  • template (str) –

    The template string containing placeholders.

Source code in llama_cpp_agent/llm_prompt_template.py
class PromptTemplate:
    """
    Class representing a prompt template.

    Methods:
        generate_prompt(template_fields: Union[dict, PromptTemplateFields], remove_empty_template_field=True) -> str:
        Generate a prompt by replacing placeholders in the template with values.

    Class Methods:
        from_string(template_string: str) -> PromptTemplate:
        Create a PromptTemplate from a string.
        from_file(template_file: str) -> PromptTemplate:
        Create a PromptTemplate from a file.

    Attributes:
        template (str): The template string containing placeholders.
    """

    def __init__(self, template_file=None, template_string=None):
        """
        Initialize a PromptTemplate instance.

        Args:
            template_file (str): The path to a file containing the template.
            template_string (str): The template string.
        """
        if template_file:
            with open(template_file, "r") as file:
                self.template = file.read()
        elif template_string:
            self.template = template_string
        else:
            raise ValueError(
                "Either 'template_file' or 'template_string' must be provided"
            )

    @classmethod
    def from_string(cls, template_string):
        """
        Create a PromptTemplate instance from a string.

        Args:
            template_string (str): The template string.

        Returns:
            PromptTemplate: Created PromptTemplate instance.
        """
        return cls(template_string=template_string)

    @classmethod
    def from_file(cls, template_file):
        """
        Create a PromptTemplate instance from a file.

        Args:
            template_file (str): The path to a file containing the template.

        Returns:
            PromptTemplate: Created PromptTemplate instance.
        """
        with open(template_file, "r") as file:
            template_string = file.read()
        return cls(template_string=template_string)

    def _remove_empty_placeholders(self, text):
        """
        Remove lines that contain only the empty placeholder.

        Args:
            text (str): The text containing placeholders.

        Returns:
            str: Text with empty placeholders removed.
        """
        # Split text into lines
        lines = text.split('\n')
        # Process each line individually
        processed_lines = []
        for line in lines:
            if '__EMPTY_TEMPLATE_FIELD__' in line:
                new_line = line.replace('__EMPTY_TEMPLATE_FIELD__', '')
                if new_line.strip():
                    processed_lines.append(new_line)
            else:
                processed_lines.append(line)
        # Join the lines back into a single string
        return '\n'.join(processed_lines)

    def generate_prompt(
        self,
        template_fields: Union[dict, PromptTemplateFields],
        remove_empty_template_field=True,
    ) -> str:
        """
        Generate a prompt by replacing placeholders in the template with values.

        Args:
            template_fields (Union[dict, PromptTemplateFields]): The template fields.
            remove_empty_template_field (bool): If True, removes lines with empty placeholders.

        Returns:
            str: The generated prompt.
        """
        cleaned_fields = {}
        for key, value in template_fields.items():
            cleaned_fields[key] = str(value) if not isinstance(value, str) else value

        template_fields = cleaned_fields
        if isinstance(template_fields, PromptTemplateFields):
            template_fields = template_fields.get_fields_dict()

        if not remove_empty_template_field:

            def replace_placeholder(match):
                placeholder = match.group(1)
                return template_fields.get(placeholder, match.group(0))

            prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)
            return prompt

        def replace_placeholder(match):
            placeholder = match.group(1)
            if template_fields.get(placeholder, match.group(0)) != "":
                return template_fields.get(placeholder, match.group(0))
            return "__EMPTY_TEMPLATE_FIELD__"

        # Initial placeholder replacement
        prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)

        return self._remove_empty_placeholders(prompt)
__init__(template_file=None, template_string=None)

Initialize a PromptTemplate instance.

Parameters:

  • template_file (str, default: None ) –

    The path to a file containing the template.

  • template_string (str, default: None ) –

    The template string.

Source code in llama_cpp_agent/llm_prompt_template.py
def __init__(self, template_file=None, template_string=None):
    """
    Initialize a PromptTemplate instance.

    Args:
        template_file (str): The path to a file containing the template.
        template_string (str): The template string.
    """
    if template_file:
        with open(template_file, "r") as file:
            self.template = file.read()
    elif template_string:
        self.template = template_string
    else:
        raise ValueError(
            "Either 'template_file' or 'template_string' must be provided"
        )
from_string(template_string) classmethod

Create a PromptTemplate instance from a string.

Parameters:

  • template_string (str) –

    The template string.

Returns:

  • PromptTemplate

    Created PromptTemplate instance.

Source code in llama_cpp_agent/llm_prompt_template.py
@classmethod
def from_string(cls, template_string):
    """
    Create a PromptTemplate instance from a string.

    Args:
        template_string (str): The template string.

    Returns:
        PromptTemplate: Created PromptTemplate instance.
    """
    return cls(template_string=template_string)
from_file(template_file) classmethod

Create a PromptTemplate instance from a file.

Parameters:

  • template_file (str) –

    The path to a file containing the template.

Returns:

  • PromptTemplate

    Created PromptTemplate instance.

Source code in llama_cpp_agent/llm_prompt_template.py
@classmethod
def from_file(cls, template_file):
    """
    Create a PromptTemplate instance from a file.

    Args:
        template_file (str): The path to a file containing the template.

    Returns:
        PromptTemplate: Created PromptTemplate instance.
    """
    with open(template_file, "r") as file:
        template_string = file.read()
    return cls(template_string=template_string)
generate_prompt(template_fields, remove_empty_template_field=True)

Generate a prompt by replacing placeholders in the template with values.

Parameters:

  • template_fields (Union[dict, PromptTemplateFields]) –

    The template fields.

  • remove_empty_template_field (bool, default: True ) –

    If True, removes lines with empty placeholders.

Returns:

  • str ( str ) –

    The generated prompt.

Source code in llama_cpp_agent/llm_prompt_template.py
def generate_prompt(
    self,
    template_fields: Union[dict, PromptTemplateFields],
    remove_empty_template_field=True,
) -> str:
    """
    Generate a prompt by replacing placeholders in the template with values.

    Args:
        template_fields (Union[dict, PromptTemplateFields]): The template fields.
        remove_empty_template_field (bool): If True, removes lines with empty placeholders.

    Returns:
        str: The generated prompt.
    """
    cleaned_fields = {}
    for key, value in template_fields.items():
        cleaned_fields[key] = str(value) if not isinstance(value, str) else value

    template_fields = cleaned_fields
    if isinstance(template_fields, PromptTemplateFields):
        template_fields = template_fields.get_fields_dict()

    if not remove_empty_template_field:

        def replace_placeholder(match):
            placeholder = match.group(1)
            return template_fields.get(placeholder, match.group(0))

        prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)
        return prompt

    def replace_placeholder(match):
        placeholder = match.group(1)
        if template_fields.get(placeholder, match.group(0)) != "":
            return template_fields.get(placeholder, match.group(0))
        return "__EMPTY_TEMPLATE_FIELD__"

    # Initial placeholder replacement
    prompt = re.sub(r"\{(\w+)\}", replace_placeholder, self.template)

    return self._remove_empty_placeholders(prompt)