diff --git a/.env.sample b/.env.sample index ef0cfc3da9b..1f6b6adb0e1 100644 --- a/.env.sample +++ b/.env.sample @@ -72,6 +72,10 @@ export GIT_REPO_URL=file://$PWD/tmp/index-bare export GH_CLIENT_ID= export GH_CLIENT_SECRET= +# Key for encrypting/decrypting GitHub tokens. Must be exactly 64 hex characters. +# Used for secure storage of GitHub tokens in the database. +export GITHUB_TOKEN_ENCRYPTION_KEY=0af877502cf11413eaa64af985fe1f8ed250ac9168a3b2db7da52cd5cc6116a9 + # Credentials for configuring Mailgun. You can leave these commented out # if you are not interested in actually sending emails. If left empty, # a mock email will be sent to a file in your local '/tmp/' directory. diff --git a/Cargo.lock b/Cargo.lock index 7c14e46e4e4..3e402239ab7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1348,6 +1348,7 @@ dependencies = [ name = "crates_io" version = "0.0.0" dependencies = [ + "aes-gcm", "anyhow", "astral-tokio-tar", "async-compression", diff --git a/Cargo.toml b/Cargo.toml index d843fa4bf85..8a4b5f8c891 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ name = "crates_io" doctest = true [dependencies] +aes-gcm = { version = "=0.10.3", features = ["std"] } anyhow = "=1.0.98" astral-tokio-tar = "=0.5.2" async-compression = { version = "=0.4.27", default-features = false, features = ["gzip", "tokio"] } diff --git a/crates/crates_io_database/src/models/user.rs b/crates/crates_io_database/src/models/user.rs index f4b232abb6f..0d2b3fac54c 100644 --- a/crates/crates_io_database/src/models/user.rs +++ b/crates/crates_io_database/src/models/user.rs @@ -27,6 +27,7 @@ pub struct User { pub account_lock_until: Option>, pub is_admin: bool, pub publish_notifications: bool, + pub gh_encrypted_token: Option>, } impl User { @@ -89,6 +90,7 @@ pub struct NewUser<'a> { pub name: Option<&'a str>, pub gh_avatar: Option<&'a str>, pub gh_access_token: &'a str, + pub gh_encrypted_token: Option<&'a [u8]>, } impl NewUser<'_> { @@ -119,6 +121,7 @@ impl NewUser<'_> { users::name.eq(excluded(users::name)), users::gh_avatar.eq(excluded(users::gh_avatar)), users::gh_access_token.eq(excluded(users::gh_access_token)), + users::gh_encrypted_token.eq(excluded(users::gh_encrypted_token)), )) .get_result(conn) .await diff --git a/crates/crates_io_database/src/schema.rs b/crates/crates_io_database/src/schema.rs index e2f2f3cbea0..31dd2772e86 100644 --- a/crates/crates_io_database/src/schema.rs +++ b/crates/crates_io_database/src/schema.rs @@ -880,6 +880,8 @@ diesel::table! { is_admin -> Bool, /// Whether or not the user wants to receive notifications when a package they own is published publish_notifications -> Bool, + /// Encrypted GitHub access token + gh_encrypted_token -> Nullable, } } diff --git a/crates/crates_io_database_dump/src/dump-db.toml b/crates/crates_io_database_dump/src/dump-db.toml index 9394701a49c..f4e2c7c42f4 100644 --- a/crates/crates_io_database_dump/src/dump-db.toml +++ b/crates/crates_io_database_dump/src/dump-db.toml @@ -232,6 +232,7 @@ account_lock_reason = "private" account_lock_until = "private" is_admin = "private" publish_notifications = "private" +gh_encrypted_token = "private" [users.column_defaults] gh_access_token = "''" diff --git a/migrations/2025-07-16-123330_add_gh_encrypted_token/down.sql b/migrations/2025-07-16-123330_add_gh_encrypted_token/down.sql new file mode 100644 index 00000000000..644583f8e90 --- /dev/null +++ b/migrations/2025-07-16-123330_add_gh_encrypted_token/down.sql @@ -0,0 +1 @@ +alter table users drop column gh_encrypted_token; diff --git a/migrations/2025-07-16-123330_add_gh_encrypted_token/up.sql b/migrations/2025-07-16-123330_add_gh_encrypted_token/up.sql new file mode 100644 index 00000000000..efb6b491e7e --- /dev/null +++ b/migrations/2025-07-16-123330_add_gh_encrypted_token/up.sql @@ -0,0 +1,4 @@ +alter table users + add column gh_encrypted_token bytea; + +comment on column users.gh_encrypted_token is 'Encrypted GitHub access token'; diff --git a/src/bin/crates-admin/encrypt_github_tokens.rs b/src/bin/crates-admin/encrypt_github_tokens.rs new file mode 100644 index 00000000000..9898c63e3fd --- /dev/null +++ b/src/bin/crates-admin/encrypt_github_tokens.rs @@ -0,0 +1,108 @@ +use anyhow::{Context, Result}; +use crates_io::util::gh_token_encryption::GitHubTokenEncryption; +use crates_io::{db, models::User}; +use crates_io_database::schema::users; +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use indicatif::{ProgressBar, ProgressIterator, ProgressStyle}; +use secrecy::ExposeSecret; + +#[derive(clap::Parser, Debug)] +#[command( + name = "encrypt-github-tokens", + about = "Encrypt existing plaintext GitHub tokens in the database.", + long_about = "Backfill operation to encrypt existing plaintext GitHub tokens using AES-256-GCM. \ + This reads users with plaintext tokens but no encrypted tokens, encrypts them, and \ + updates the database with the encrypted versions." +)] +pub struct Opts {} + +pub async fn run(_opts: Opts) -> Result<()> { + println!("Starting GitHub token encryption backfill…"); + + // Load encryption configuration + let encryption = GitHubTokenEncryption::from_environment() + .context("Failed to load encryption configuration")?; + + // Get database connection + let mut conn = db::oneoff_connection() + .await + .context("Failed to establish database connection")?; + + // Query users with no encrypted tokens + let users_to_encrypt = users::table + .filter(users::gh_encrypted_token.is_null()) + .select(User::as_select()) + .load(&mut conn) + .await + .context("Failed to query users with plaintext tokens")?; + + let total_users = users_to_encrypt.len(); + if total_users == 0 { + println!("Found no users that need token encryption. Exiting."); + return Ok(()); + } + + println!("Found {total_users} users with plaintext tokens to encrypt"); + + let pb = ProgressBar::new(total_users as u64); + pb.set_style(ProgressStyle::with_template( + "{bar:60} ({pos}/{len}, ETA {eta}) {msg}", + )?); + + let mut encrypted_count = 0; + let mut failed_count = 0; + + for user in users_to_encrypt.into_iter().progress_with(pb.clone()) { + let user_id = user.id; + let plaintext_token = user.gh_access_token.expose_secret(); + + let encrypted_token = match encryption.encrypt(plaintext_token) { + Ok(encrypted_token) => encrypted_token, + Err(e) => { + pb.suspend(|| eprintln!("Failed to encrypt token for user {user_id}: {e}")); + failed_count += 1; + continue; + } + }; + + // Update the user with the encrypted token + if let Err(e) = diesel::update(users::table.find(user_id)) + .set(users::gh_encrypted_token.eq(Some(encrypted_token))) + .execute(&mut conn) + .await + { + pb.suspend(|| eprintln!("Failed to update user {user_id}: {e}")); + failed_count += 1; + continue; + } + + encrypted_count += 1; + } + + pb.finish_with_message("Backfill completed!"); + println!("Successfully encrypted: {encrypted_count} tokens"); + + if failed_count > 0 { + eprintln!( + "WARNING: {failed_count} tokens failed to encrypt. Please review the errors above." + ); + std::process::exit(1); + } + + // Verify the backfill by checking for any remaining unencrypted tokens + let remaining_unencrypted = users::table + .filter(users::gh_encrypted_token.is_null()) + .count() + .get_result::(&mut conn) + .await + .context("Failed to count remaining unencrypted tokens")?; + + if remaining_unencrypted > 0 { + eprintln!("WARNING: {remaining_unencrypted} users still have unencrypted tokens"); + std::process::exit(1); + } + + println!("Verification successful: All non-empty tokens have been encrypted!"); + Ok(()) +} diff --git a/src/bin/crates-admin/main.rs b/src/bin/crates-admin/main.rs index 0e3b8cc1e1c..cabc4dda62c 100644 --- a/src/bin/crates-admin/main.rs +++ b/src/bin/crates-admin/main.rs @@ -6,6 +6,7 @@ mod default_versions; mod delete_crate; mod delete_version; mod dialoguer; +mod encrypt_github_tokens; mod enqueue_job; mod migrate; mod populate; @@ -21,6 +22,7 @@ enum Command { BackfillOgImages(backfill_og_images::Opts), DeleteCrate(delete_crate::Opts), DeleteVersion(delete_version::Opts), + EncryptGithubTokens(encrypt_github_tokens::Opts), Populate(populate::Opts), RenderReadmes(render_readmes::Opts), TransferCrates(transfer_crates::Opts), @@ -51,6 +53,7 @@ async fn main() -> anyhow::Result<()> { Command::BackfillOgImages(opts) => backfill_og_images::run(opts).await, Command::DeleteCrate(opts) => delete_crate::run(opts).await, Command::DeleteVersion(opts) => delete_version::run(opts).await, + Command::EncryptGithubTokens(opts) => encrypt_github_tokens::run(opts).await, Command::Populate(opts) => populate::run(opts).await, Command::RenderReadmes(opts) => render_readmes::run(opts).await, Command::TransferCrates(opts) => transfer_crates::run(opts).await, diff --git a/src/config/server.rs b/src/config/server.rs index 32cb6f10f73..9a3b507b6b5 100644 --- a/src/config/server.rs +++ b/src/config/server.rs @@ -5,6 +5,7 @@ use url::Url; use crate::Env; use crate::rate_limiter::{LimitedAction, RateLimiterConfig}; +use crate::util::gh_token_encryption::GitHubTokenEncryption; use super::base::Base; use super::database_pools::DatabasePools; @@ -42,6 +43,7 @@ pub struct Server { pub session_key: cookie::Key, pub gh_client_id: ClientId, pub gh_client_secret: ClientSecret, + pub gh_token_encryption: GitHubTokenEncryption, pub max_upload_size: u32, pub max_unpack_size: u64, pub max_dependencies: usize, @@ -106,6 +108,7 @@ impl Server { /// - `SESSION_KEY`: The key used to sign and encrypt session cookies. /// - `GH_CLIENT_ID`: The client ID of the associated GitHub application. /// - `GH_CLIENT_SECRET`: The client secret of the associated GitHub application. + /// - `GITHUB_TOKEN_ENCRYPTION_KEY`: Key for encrypting GitHub access tokens (64 hex characters). /// - `BLOCKED_TRAFFIC`: A list of headers and environment variables to use for blocking /// traffic. See the `block_traffic` module for more documentation. /// - `DOWNLOADS_PERSIST_INTERVAL_MS`: how frequent to persist download counts (in ms). @@ -205,6 +208,7 @@ impl Server { session_key: cookie::Key::derive_from(required_var("SESSION_KEY")?.as_bytes()), gh_client_id: ClientId::new(required_var("GH_CLIENT_ID")?), gh_client_secret: ClientSecret::new(required_var("GH_CLIENT_SECRET")?), + gh_token_encryption: GitHubTokenEncryption::from_environment()?, max_upload_size: 10 * 1024 * 1024, // 10 MB default file upload size limit max_unpack_size: 512 * 1024 * 1024, // 512 MB max when decompressed max_dependencies: DEFAULT_MAX_DEPENDENCIES, diff --git a/src/controllers/session.rs b/src/controllers/session.rs index 7ca76afbb2b..fba84696d1e 100644 --- a/src/controllers/session.rs +++ b/src/controllers/session.rs @@ -19,7 +19,7 @@ use minijinja::context; use oauth2::{AuthorizationCode, CsrfToken, Scope, TokenResponse}; use secrecy::ExposeSecret; use serde::{Deserialize, Serialize}; -use tracing::warn; +use tracing::{error, warn}; #[derive(Debug, Serialize, utoipa::ToSchema)] pub struct BeginResponse { @@ -114,11 +114,25 @@ pub async fn authorize_session( let token = token.access_token(); + // Encrypt the GitHub access token + let encryption = &app.config.gh_token_encryption; + let encrypted_token = encryption.encrypt(token.secret()).map_err(|error| { + error!("Failed to encrypt GitHub token: {error}"); + server_error("Internal server error") + })?; + // Fetch the user info from GitHub using the access token we just got and create a user record let ghuser = app.github.current_user(token).await?; let mut conn = app.db_write().await?; - let user = save_user_to_database(&ghuser, token.secret(), &app.emails, &mut conn).await?; + let user = save_user_to_database( + &ghuser, + token.secret(), + &encrypted_token, + &app.emails, + &mut conn, + ) + .await?; // Log in by setting a cookie and the middleware authentication session.insert("user_id".to_string(), user.id.to_string()); @@ -129,6 +143,7 @@ pub async fn authorize_session( pub async fn save_user_to_database( user: &GitHubUser, access_token: &str, + encrypted_token: &[u8], emails: &Emails, conn: &mut AsyncPgConnection, ) -> QueryResult { @@ -138,6 +153,7 @@ pub async fn save_user_to_database( .maybe_name(user.name.as_deref()) .maybe_gh_avatar(user.avatar_url.as_deref()) .gh_access_token(access_token) + .gh_encrypted_token(encrypted_token) .build(); match create_or_update_user(&new_user, user.email.as_deref(), emails, conn).await { @@ -241,7 +257,9 @@ mod tests { id: -1, avatar_url: None, }; - let result = save_user_to_database(&gh_user, "arbitrary_token", &emails, &mut conn).await; + + let result = + save_user_to_database(&gh_user, "arbitrary_token", &[], &emails, &mut conn).await; assert!( result.is_ok(), diff --git a/src/tests/user.rs b/src/tests/user.rs index ac44af2038d..e8e0756e0e2 100644 --- a/src/tests/user.rs +++ b/src/tests/user.rs @@ -38,7 +38,7 @@ async fn updating_existing_user_doesnt_change_api_token() -> anyhow::Result<()> email: None, avatar_url: None, }; - assert_ok!(session::save_user_to_database(&gh_user, "bar_token", emails, &mut conn).await); + assert_ok!(session::save_user_to_database(&gh_user, "bar_token", &[], emails, &mut conn).await); // Use the original API token to find the now updated user let hashed_token = assert_ok!(HashedToken::parse(token)); @@ -79,8 +79,8 @@ async fn github_without_email_does_not_overwrite_email() -> anyhow::Result<()> { avatar_url: None, }; - let u = - session::save_user_to_database(&gh_user, "some random token", emails, &mut conn).await?; + let u = session::save_user_to_database(&gh_user, "some random token", &[], emails, &mut conn) + .await?; let user_without_github_email = MockCookieUser::new(&app, u); @@ -103,8 +103,8 @@ async fn github_without_email_does_not_overwrite_email() -> anyhow::Result<()> { avatar_url: None, }; - let u = - session::save_user_to_database(&gh_user, "some random token", emails, &mut conn).await?; + let u = session::save_user_to_database(&gh_user, "some random token", &[], emails, &mut conn) + .await?; let again_user_without_github_email = MockCookieUser::new(&app, u); @@ -145,8 +145,8 @@ async fn github_with_email_does_not_overwrite_email() -> anyhow::Result<()> { avatar_url: None, }; - let u = - session::save_user_to_database(&gh_user, "some random token", &emails, &mut conn).await?; + let u = session::save_user_to_database(&gh_user, "some random token", &[], &emails, &mut conn) + .await?; let user_with_different_email_in_github = MockCookieUser::new(&app, u); @@ -202,8 +202,8 @@ async fn test_confirm_user_email() -> anyhow::Result<()> { avatar_url: None, }; - let u = - session::save_user_to_database(&gh_user, "some random token", emails, &mut conn).await?; + let u = session::save_user_to_database(&gh_user, "some random token", &[], emails, &mut conn) + .await?; let user = MockCookieUser::new(&app, u); let user_model = user.as_model(); @@ -248,8 +248,8 @@ async fn test_existing_user_email() -> anyhow::Result<()> { avatar_url: None, }; - let u = - session::save_user_to_database(&gh_user, "some random token", emails, &mut conn).await?; + let u = session::save_user_to_database(&gh_user, "some random token", &[], emails, &mut conn) + .await?; update(Email::belonging_to(&u)) // Users created before we added verification will have diff --git a/src/tests/util/test_app.rs b/src/tests/util/test_app.rs index 4f2370f7c95..f3da1ac4131 100644 --- a/src/tests/util/test_app.rs +++ b/src/tests/util/test_app.rs @@ -9,6 +9,7 @@ use crate::rate_limiter::{LimitedAction, RateLimiterConfig}; use crate::storage::StorageConfig; use crate::tests::util::chaosproxy::ChaosProxy; use crate::tests::util::github::MOCK_GITHUB_DATA; +use crate::util::gh_token_encryption::GitHubTokenEncryption; use crate::worker::{Environment, RunnerExt}; use crate::{App, Emails, Env}; use claims::assert_some; @@ -489,6 +490,7 @@ fn simple_config() -> config::Server { session_key: cookie::Key::derive_from("test this has to be over 32 bytes long".as_bytes()), gh_client_id: ClientId::new(dotenvy::var("GH_CLIENT_ID").unwrap_or_default()), gh_client_secret: ClientSecret::new(dotenvy::var("GH_CLIENT_SECRET").unwrap_or_default()), + gh_token_encryption: GitHubTokenEncryption::for_testing(), max_upload_size: 128 * 1024, // 128 kB should be enough for most testing purposes max_unpack_size: 128 * 1024, // 128 kB should be enough for most testing purposes max_features: 10, diff --git a/src/util.rs b/src/util.rs index 66d93e79bf5..405ea9c596c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -4,6 +4,7 @@ pub use crates_io_database::utils::token; pub mod diesel; pub mod errors; +pub mod gh_token_encryption; mod io_util; mod request_helpers; pub mod string_excl_null; diff --git a/src/util/gh_token_encryption.rs b/src/util/gh_token_encryption.rs new file mode 100644 index 00000000000..1efdbcaf1ab --- /dev/null +++ b/src/util/gh_token_encryption.rs @@ -0,0 +1,176 @@ +use aes_gcm::aead::{Aead, AeadCore, OsRng}; +use aes_gcm::{Aes256Gcm, Key, KeyInit, Nonce}; +use anyhow::{Context, Result}; +use oauth2::AccessToken; + +/// A struct that encapsulates GitHub token encryption and decryption +/// using AES-256-GCM. +pub struct GitHubTokenEncryption { + cipher: Aes256Gcm, +} + +impl GitHubTokenEncryption { + /// Creates a new [GitHubTokenEncryption] instance with the provided cipher + pub fn new(cipher: Aes256Gcm) -> Self { + Self { cipher } + } + + /// Creates a new [GitHubTokenEncryption] instance with a cipher for testing + /// purposes. + #[cfg(test)] + pub fn for_testing() -> Self { + let test_key = b"test_key_32_bytes_long_for_tests"; + Self::new(Aes256Gcm::new(Key::::from_slice(test_key))) + } + + /// Creates a new [GitHubTokenEncryption] instance from the environment + /// + /// Reads the `GITHUB_TOKEN_ENCRYPTION_KEY` environment variable, which + /// should be a 64-character hex string (32 bytes when decoded). + pub fn from_environment() -> Result { + let gh_token_key = std::env::var("GITHUB_TOKEN_ENCRYPTION_KEY") + .context("GITHUB_TOKEN_ENCRYPTION_KEY environment variable not set")?; + + if gh_token_key.len() != 64 { + anyhow::bail!("GITHUB_TOKEN_ENCRYPTION_KEY must be exactly 64 hex characters"); + } + + let gh_token_key = hex::decode(gh_token_key.as_bytes()) + .context("GITHUB_TOKEN_ENCRYPTION_KEY must be exactly 64 hex characters")?; + + let cipher = Aes256Gcm::new(Key::::from_slice(&gh_token_key)); + + Ok(Self::new(cipher)) + } + + /// Encrypts a GitHub access token using AES-256-GCM + /// + /// The encrypted data format is: `[12-byte nonce][encrypted data]` + /// The nonce is randomly generated for each encryption to ensure uniqueness. + pub fn encrypt(&self, plaintext: &str) -> Result> { + // Generate a random nonce for this encryption + let nonce = Aes256Gcm::generate_nonce(&mut OsRng); + + // Encrypt the token + let encrypted = self + .cipher + .encrypt(&nonce, plaintext.as_bytes()) + .map_err(|error| anyhow::anyhow!("Failed to encrypt token: {error}"))?; + + // Combine nonce + ciphertext (which includes the auth tag) + let mut result = Vec::with_capacity(nonce.len() + encrypted.len()); + result.extend_from_slice(&nonce); + result.extend_from_slice(&encrypted); + + Ok(result) + } + + /// Decrypts a GitHub access token using AES-256-GCM + /// + /// Expects the data format: `[12-byte nonce][encrypted data]` + pub fn decrypt(&self, encrypted: &[u8]) -> Result { + if encrypted.len() < 12 { + anyhow::bail!("Invalid encrypted token: too short"); + } + + // Extract nonce and ciphertext + let (nonce_bytes, ciphertext) = encrypted.split_at(12); + let nonce = Nonce::from_slice(nonce_bytes); + + // Decrypt the token + let plaintext = self + .cipher + .decrypt(nonce, ciphertext) + .context("Failed to decrypt token")?; + + let plaintext = + String::from_utf8(plaintext).context("Decrypted token is not valid UTF-8")?; + + Ok(AccessToken::new(plaintext)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use aes_gcm::{Key, KeyInit}; + use claims::{assert_err, assert_ok}; + use insta::assert_snapshot; + + fn create_test_encryption() -> GitHubTokenEncryption { + let key = Key::::from_slice(b"test_master_key_32_bytes_long!!!"); + let cipher = Aes256Gcm::new(key); + GitHubTokenEncryption { cipher } + } + + #[test] + fn test_encrypt_decrypt_roundtrip() { + let encryption = create_test_encryption(); + let original_token = "ghs_test_token_123456789"; + + // Encrypt the token + let encrypted = assert_ok!(encryption.encrypt(original_token)); + + // Decrypt it back + let decrypted = assert_ok!(encryption.decrypt(&encrypted)); + + assert_eq!(original_token, decrypted.secret()); + } + + #[test] + fn test_encrypt_produces_different_ciphertext() { + let encryption = create_test_encryption(); + let token = "ghs_test_token_123456789"; + + // Encrypt the same token twice + let encrypted1 = assert_ok!(encryption.encrypt(token)); + let encrypted2 = assert_ok!(encryption.encrypt(token)); + + // Should produce different ciphertext due to random nonce + assert_ne!(encrypted1, encrypted2); + + // But both should decrypt to the same plaintext + let decrypted1 = assert_ok!(encryption.decrypt(&encrypted1)); + let decrypted2 = assert_ok!(encryption.decrypt(&encrypted2)); + + assert_eq!(decrypted1.secret(), decrypted2.secret()); + assert_eq!(decrypted1.secret(), token); + } + + #[test] + fn test_invalid_encrypted_data() { + let encryption = create_test_encryption(); + + // Too short + let err = assert_err!(encryption.decrypt(&[1, 2, 3])); + assert_snapshot!(err, @"Invalid encrypted token: too short"); + + // Invalid data + let invalid_data = vec![0u8; 50]; + let err = assert_err!(encryption.decrypt(&invalid_data)); + assert_snapshot!(err, @"Failed to decrypt token"); + } + + #[test] + fn test_different_keys() { + let encryption1 = create_test_encryption(); + + // Create a different encryption with a different key + let key2 = Key::::from_slice(b"different_key_32_bytes_long!!!!!"); + let cipher2 = Aes256Gcm::new(key2); + let encryption2 = GitHubTokenEncryption { cipher: cipher2 }; + + let token = "ghs_test_token_123456789"; + + // Encrypt with encryption1 + let encrypted = assert_ok!(encryption1.encrypt(token)); + + // Try to decrypt with encryption2 (should fail) + let err = assert_err!(encryption2.decrypt(&encrypted)); + assert_snapshot!(err, @"Failed to decrypt token"); + + // But encryption1 should still work + let decrypted = assert_ok!(encryption1.decrypt(&encrypted)); + assert_eq!(decrypted.secret(), token); + } +}