google · copybara-service · Nov 21, 2025
diff --git a/gemma/flash_attention.cc b/gemma/flash_attention.cc
@@ -17,7 +17,9 @@
 #include <stdint.h>
 
 #include <algorithm>
+#include <array>
 #include <cmath>
+#include <cstdlib>
 #include <limits>
 
 #include "compression/types.h"  // GEMMA_DISABLED_TARGETS

diff --git a/gemma/flash_attention.h b/gemma/flash_attention.h
@@ -60,6 +60,7 @@ namespace gcpp {
                       size_t layer_idx, const MatPtr& query_norm_scale,       \
                       AttentionActivationsPtrs& activations, QBatch& qbatch,  \
                       ThreadingContext& ctx);                                 \
+                                                                              \
   /* NOLINTNEXTLINE(google-readability-namespace-comments) */                 \
   }  // namespace NAMESPACE
 

diff --git a/gemma/kv_cache.cc b/gemma/kv_cache.cc
@@ -51,15 +51,16 @@ KVCache KVCache::Copy() {
   KVCache copy(kv_cache.Extents(), allocator_);
 
   CopyMat(kv_cache, copy.kv_cache);
-
   return copy;
 }
 
 std::vector<KVCachePtr> ToKVCachePtrs(const hwy::Span<KVCache>& kv_caches) {
   std::vector<KVCachePtr> ptrs;
   ptrs.reserve(kv_caches.size());
   for (size_t i = 0; i < kv_caches.size(); ++i) {
-    ptrs.push_back(KVCachePtr{.kv_cache = kv_caches[i].kv_cache});
+    ptrs.push_back(KVCachePtr{
+        .kv_cache = kv_caches[i].kv_cache,
+    });
   }
   return ptrs;
 }

diff --git a/gemma/kv_cache.h b/gemma/kv_cache.h
@@ -17,9 +17,11 @@
 #define THIRD_PARTY_GEMMA_CPP_GEMMA_KV_CACHE_H_
 
 #include <stddef.h>
+
+#include <optional>
 #include <vector>
 
-#include "gemma/configs.h"  // ModelConfig
+#include "gemma/configs.h"     // ModelConfig
 #include "gemma/gemma_args.h"  // InferenceArgs
 #include "util/basics.h"       // BF16
 #include "util/mat.h"
@@ -31,12 +33,13 @@ using KV_t = float;
 struct KVCache {
   KVCache(const ModelConfig& config, const InferenceArgs& inference_args,
           const Allocator& allocator);
-
   // Returns a deep copy of the KVCache. Use explicit function instead of
   // copy ctor to make the cost explicit.
   KVCache Copy();
 
-  size_t SeqLen() const { return kv_cache.Rows(); }
+  size_t SeqLen() const {
+    return kv_cache.Rows();
+  }
 
   MatStorageT<KV_t> kv_cache;  // [seq_len, layers * kv_heads * qkv_dim * 2]
 
@@ -49,7 +52,9 @@ struct KVCache {
 
 // A non-owning view of a KVCache.
 struct KVCachePtr {
-  size_t SeqLen() const { return kv_cache.Rows(); }
+  size_t SeqLen() const {
+    return kv_cache.Rows();
+  }
   MatPtrT<KV_t> kv_cache;
 };
 

diff --git a/ops/ops-inl.h b/ops/ops-inl.h
@@ -25,6 +25,7 @@
 #include <cstdint>
 #include <random>
 #include <type_traits>  // std::enable_if_t
+#include <utility>
 #include <vector>
 
 #include "ops/matmul.h"