diff --git a/gptel-openai-responses.el b/gptel-openai-responses.el index da78414e..d89ad301 100644 --- a/gptel-openai-responses.el +++ b/gptel-openai-responses.el @@ -228,6 +228,10 @@ Mutate state INFO with response metadata." ;; Temperature (when (and gptel-temperature (not o-model-p)) (plist-put prompts-plist :temperature gptel-temperature)) + ;; Reasoning effort + (when gptel-reasoning-effort + (plist-put prompts-plist :reasoning + (list :effort (symbol-name gptel-reasoning-effort)))) ;; Max tokens (when gptel-max-tokens (plist-put prompts-plist :max_output_tokens gptel-max-tokens)) diff --git a/gptel-openai.el b/gptel-openai.el index bb8b0818..dc6feac2 100644 --- a/gptel-openai.el +++ b/gptel-openai.el @@ -476,6 +476,7 @@ Media files, if present, are placed in `gptel-context'." '((gpt-5.4-mini :description "Faster, more cost-efficient version of GPT-5.4" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member none low medium high xhigh) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 0.75 @@ -484,6 +485,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5.4-nano :description "Fastest, cheapest version of GPT-5.4" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member none low medium high xhigh) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 0.20 @@ -492,6 +494,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5.4 :description "The best model for coding and agentic tasks" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member none low medium high xhigh) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 1050 :input-cost 2.50 @@ -500,6 +503,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5.4-pro :description "Maximum performance model for reasoning tasks" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member none low medium high xhigh) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 1050 :input-cost 30 @@ -508,6 +512,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5.3-chat-latest :description "Answers right away" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member medium) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 1.75 @@ -516,6 +521,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5.2 :description "The best model for coding and agentic tasks" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member none low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 1.75 @@ -524,6 +530,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5.1 :description "The best model for coding and agentic tasks" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member none low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 1.25 @@ -532,6 +539,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5-mini :description "Faster, more cost-efficient version of GPT-5" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member minimal low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 0.25 @@ -540,6 +548,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5-nano :description "Fastest, cheapest version of GPT-5" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member minimal low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 0.05 @@ -548,6 +557,7 @@ Media files, if present, are placed in `gptel-context'." (gpt-5 :description "Flagship model for coding, reasoning, and agentic tasks across domains" :capabilities (media tool-use json url responses-api) + :reasoning-effort (member minimal low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 400 :input-cost 1.25 @@ -619,6 +629,7 @@ Media files, if present, are placed in `gptel-context'." (o4-mini :description "Fast, effective reasoning with efficient performance in coding and visual tasks" :capabilities (reasoning media tool-use json url responses-api) + :reasoning-effort (member low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 :input-cost 1.10 @@ -626,6 +637,7 @@ Media files, if present, are placed in `gptel-context'." :cutoff-date "2024-05") (o3-mini :description "High intelligence at the same cost and latency targets of o1-mini" + :reasoning-effort (member low medium high) :context-window 200 :input-cost 1.10 :output-cost 4.40 @@ -634,6 +646,7 @@ Media files, if present, are placed in `gptel-context'." (o3 :description "Well-rounded and powerful model across domains" :capabilities (reasoning media tool-use json url responses-api) + :reasoning-effort (member low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 :input-cost 2 @@ -642,6 +655,7 @@ Media files, if present, are placed in `gptel-context'." (o3-pro :description "Maximum performance model for reasoning tasks" :capabilities (reasoning media tool-use json url responses-api) + :reasoning-effort (member low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 :input-cost 20 @@ -649,6 +663,7 @@ Media files, if present, are placed in `gptel-context'." :cutoff-date "2024-05") (o1-mini :description "Faster and cheaper reasoning model good at coding, math, and science" + :reasoning-effort (member low medium high) :context-window 128 :input-cost 1.10 :output-cost 4.40 @@ -657,6 +672,7 @@ Media files, if present, are placed in `gptel-context'." (o1 :description "Reasoning model designed to solve hard problems across domains" :capabilities (media reasoning responses-api) + :reasoning-effort (member low medium high) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 :input-cost 15 diff --git a/gptel-org.el b/gptel-org.el index 32bafd09..21a71787 100644 --- a/gptel-org.el +++ b/gptel-org.el @@ -37,6 +37,7 @@ (defvar gptel--system-message) (defvar gptel-model) (defvar gptel-temperature) +(defvar gptel-reasoning-effort) (defvar gptel-max-tokens) (defvar gptel--link-type-cache) (defvar gptel--preset) @@ -501,12 +502,13 @@ parameters. ARGS are the original function call arguments." (if (derived-mode-p 'org-mode) (pcase-let ((`( ,gptel--preset ,gptel--system-message ,gptel-backend - ,gptel-model ,gptel-temperature ,gptel-max-tokens - ,gptel--num-messages-to-send ,gptel-tools) + ,gptel-model ,gptel-temperature ,gptel-reasoning-effort + ,gptel-max-tokens ,gptel--num-messages-to-send ,gptel-tools) (seq-mapn (lambda (a b) (or a b)) (gptel-org--entry-properties) (list gptel--preset gptel--system-message gptel-backend - gptel-model gptel-temperature gptel-max-tokens + gptel-model gptel-temperature + gptel-reasoning-effort gptel-max-tokens gptel--num-messages-to-send gptel-tools)))) (apply send-fun args)) (apply send-fun args))) @@ -528,8 +530,8 @@ ARGS are the original function call arguments." (mapcar (lambda (prop) (org-entry-get (or pt (point)) prop 'selective)) '("GPTEL_PRESET" "GPTEL_SYSTEM" "GPTEL_BACKEND" - "GPTEL_MODEL" "GPTEL_TEMPERATURE" "GPTEL_MAX_TOKENS" - "GPTEL_NUM_MESSAGES_TO_SEND" "GPTEL_TOOLS")))) + "GPTEL_MODEL" "GPTEL_TEMPERATURE" "GPTEL_REASONING_EFFORT" + "GPTEL_MAX_TOKENS" "GPTEL_NUM_MESSAGES_TO_SEND" "GPTEL_TOOLS")))) (when preset (setq preset (gptel--intern preset))) (when system (setq system (string-replace "\\n" "\n" system))) @@ -560,7 +562,7 @@ ARGS are the original function call arguments." (progn (when-let* ((bounds (org-entry-get (point-min) "GPTEL_BOUNDS"))) (gptel--restore-props (read bounds))) - (pcase-let ((`(,preset ,system ,backend ,model ,temperature ,tokens ,num ,tools) + (pcase-let ((`(,preset ,system ,backend ,model ,temperature ,effort ,tokens ,num ,tools) (gptel-org--entry-properties (point-min)))) (when preset (if (gptel-get-preset preset) @@ -582,6 +584,7 @@ ARGS are the original function call arguments." backend)) (when model (setq-local gptel-model model)) (when temperature (setq-local gptel-temperature temperature)) + (when effort (setq-local gptel-reasoning-effort effort)) (when tokens (setq-local gptel-max-tokens tokens)) (when num (setq-local gptel--num-messages-to-send num)) (when tools (setq-local gptel-tools tools)))) @@ -630,11 +633,17 @@ send in queries. (See `gptel--num-messages-to-send' for the last one.)" (if (gptel--preset-mismatch-value preset-spec :tools tool-names) (org-entry-put pt "GPTEL_TOOLS" (string-join tool-names " ")) (org-entry-delete pt "GPTEL_TOOLS"))) - ;; Temperature, max tokens and cutoff + ;; Temperature, reasoning effort, max tokens and cutoff (if (and (gptel--preset-mismatch-value preset-spec :temperature gptel-temperature) (not (equal (default-value 'gptel-temperature) gptel-temperature))) (org-entry-put pt "GPTEL_TEMPERATURE" (number-to-string gptel-temperature)) (org-entry-delete pt "GPTEL_TEMPERATURE")) + (if (and (gptel--preset-mismatch-value preset-spec :reasoning-effort + gptel-reasoning-effort) + (not (equal (default-value 'gptel-reasoning-effort) + gptel-reasoning-effort))) + (org-entry-put pt "GPTEL_REASONING_EFFORT" (prin1-to-string gptel-reasoning-effort)) + (org-entry-delete pt "GPTEL_REASONING_EFFORT")) (if (and (gptel--preset-mismatch-value preset-spec :max-tokens gptel-max-tokens) gptel-max-tokens) (org-entry-put pt "GPTEL_MAX_TOKENS" (number-to-string gptel-max-tokens)) diff --git a/gptel-request.el b/gptel-request.el index 2f6230a3..9906e998 100644 --- a/gptel-request.el +++ b/gptel-request.el @@ -288,6 +288,24 @@ To set the temperature for a chat session interactively call :type '(choice (number :tag "Temperature value") (const :tag "Use default" nil))) +(defcustom gptel-reasoning-effort nil + "Reasoning effort of the LLM response. + +This controls how hard the LLM will \"think\" before generating +the final response. Not all models support reasoning effort. When +this value is nil, the model's default reasoning effort will be +used. The valid values vary depending on the model and the LLM +provider. + +Symbols as well as non-negative integers are supported but may +generate errors if the specified value isn't allowed." + :safe (lambda (v) (or (null v) + (symbolp v) + (and (integerp v) (>= v 0)))) + :type '(choice (symbol :tag "Reasoning effort level") + (integer :tag "Max thinking tokens") + (const :tag "Use default" nil))) + (defcustom gptel-cache nil "Whether the LLM should cache request content. @@ -1020,7 +1038,8 @@ For BUF, START, END and BODY-THUNK see `gptel--with-buffer-copy'." gptel-use-tools gptel-tools gptel-use-curl gptel--schema gptel-use-context gptel-context gptel--num-messages-to-send gptel-stream gptel-include-reasoning gptel--request-params - gptel-temperature gptel-max-tokens gptel-cache)) + gptel-temperature gptel-reasoning-effort gptel-max-tokens + gptel-cache)) (set (make-local-variable sym) (buffer-local-value sym buf))) (when (and start end) (insert-buffer-substring buf start end)) (setq major-mode (buffer-local-value 'major-mode buf)) diff --git a/gptel-transient.el b/gptel-transient.el index 10209f90..a19a2d17 100644 --- a/gptel-transient.el +++ b/gptel-transient.el @@ -850,6 +850,7 @@ Also format the value of OBJ in the transient menu." :if (lambda () (and gptel-expert-commands (or gptel-mode gptel-track-response)))) (gptel--infix-temperature :if (lambda () gptel-expert-commands)) + (gptel--infix-reasoning-effort :if (lambda () gptel-expert-commands)) (gptel--infix-use-context) (gptel--infix-include-reasoning) (gptel--infix-use-tools) @@ -1333,6 +1334,60 @@ responses." :prompt "Temperature controls the response randomness (0.0-2.0, leave empty for API default): " :reader 'gptel--transient-read-number) +(defun gptel--transient-read-reasoning-effort (prompt _initial-input history) + "Read the reasoning effort from the minibuffer. + +PROMPT, _INITIAL-INPUT and HISTORY are as in the transient reader +documention. Return nil if user does not provide a number, for default." + ;; Workaround for buggy transient behaviour when dealing with + ;; non-string values. See: https://github.com/magit/transient/issues/172 + (when-let* ((history-symbol (or (car-safe history) history)) + (val (and (symbolp history-symbol) (symbol-value history-symbol)))) + (unless (stringp (car val)) + (setcar val (prin1-to-string (car val))))) + (if-let* ((effort-type (get gptel-model :reasoning-effort))) + (cond + ((eq (car effort-type) 'member) + (let* ((table (let ((effort-choices (cons 'default (cdr effort-type)))) + ;; Display the completion candidates in the order listed + ;; instead of allowing the completion framework to sort + ;; them. This is cleaner since they are listed in + ;; increasing order of reasoning effort. + (lambda (string predicate action) + (if (eq action 'metadata) + (let ((current-metadata (cdr (completion-metadata + (minibuffer-contents) + effort-choices + minibuffer-completion-predicate)))) + `(metadata + ,@(map-merge 'alist + current-metadata + '((display-sort-function . identity) + (cycle-sort-function . identity))))) + (complete-with-action action effort-choices string predicate))))) + (effort (completing-read prompt table nil t))) + ;; Allow the user to restore the value to nil. + (unless (string= effort "default") + (intern effort)))) + ((eq (car effort-type) 'integer) + (let* ((minibuffer-default-prompt-format "") + (num (read-number prompt -1 history))) + (if (= num -1) nil num))) + (t + (user-error "Unknown reasoning effort type: %S" effort-type))) + (user-error "Reasoning effort is not supported for this model"))) + +(transient-define-infix gptel--infix-reasoning-effort () + "Reasoning effort of request." + :description "Reasoning effort" + :display-nil "default" + :class 'gptel-lisp-variable + :variable 'gptel-reasoning-effort + :set-value #'gptel--set-with-scope + :key "-r" + :prompt "Reasoning effort controls how hard the LLM \"thinks\" (leave empty for API default): " + :reader 'gptel--transient-read-reasoning-effort) + (transient-define-infix gptel--infix-track-response () "Distinguish between user messages and LLM responses.