diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 7f963231..00000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index 59d8ed2c..f08785ff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,10 @@ -validators/state.json -bittensor_subnet_template.egg-info/ -bittensor_subnet_template.egg-info -template/__pycache__/ -__pycache__/ +*.egg-info +__pycache__ *.pyc state.json wandb/ .vscode .envrc -.idea/ \ No newline at end of file +.idea/ +.DS_Store +venv diff --git a/Cortex.t.egg-info/PKG-INFO b/Cortex.t.egg-info/PKG-INFO deleted file mode 100644 index bd0205ce..00000000 --- a/Cortex.t.egg-info/PKG-INFO +++ /dev/null @@ -1,136 +0,0 @@ -Metadata-Version: 2.1 -Name: Cortex.t -Version: 3.1.6 -Summary: Decentralized APIs for synthetic data generation -Home-page: https://github.com/corcel-api/cortex.t -Author: Fish -License: MIT -Classifier: Development Status :: 3 - Alpha -Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: MIT License -Classifier: Programming Language :: Python :: 3 -Classifier: Topic :: Software Development -Requires-Python: >=3.8 -Description-Content-Type: text/markdown -License-File: LICENSE -Requires-Dist: aiohttp==3.* -Requires-Dist: bittensor==6.* -Requires-Dist: datasets==2.* -Requires-Dist: envparse==0.2.0 -Requires-Dist: openai==1.*,>=1.3.2 -Requires-Dist: Pillow==10.* -Requires-Dist: requests==2.* -Requires-Dist: scikit-learn==1.* -Requires-Dist: torch==2.* -Requires-Dist: transformers==4.* -Requires-Dist: wandb -Requires-Dist: anthropic -Requires-Dist: stability-sdk -Requires-Dist: boto3 -Requires-Dist: anthropic_bedrock -Requires-Dist: pyOpenSSL - -
- -# **Cortex.t Subnet** -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ---- - ---- -- [Introduction](#introduction) -- [Setup](#setup) -- [Mining](#mining) -- [Validating](#validating) -- [License](#license) - - -## Introduction - -**IMPORTANT**: If you are new to Bittensor, please checkout the [Bittensor Website](https://bittensor.com/) before proceeding to the [Setup](#setup) section. - -Introducing Bittensor Subnet 18 (Cortex.t): A Pioneering Platform for AI Development and Synthetic Data Generation. - -Cortex.t stands at the forefront of artificial intelligence, offering a dual-purpose solution that caters to the needs of app developers and innovators in the AI space. This platform is meticulously designed to deliver reliable, high-quality text and image responses through API usage, utilising the decentralised Bittensor network. It serves as a cornerstone for creating a fair, transparent, and manipulation-free environment for the incentivised production of intelligence (mining) and generation and fulfilment of diverse user prompts. - -Our initiative is a leap forward in redefining the reward system for text and image prompting with a commitment to providing stability and reassurance to developers. By focusing on the value delivered to clients, we alleviate the concerns of data inconsistencies that often plague app development. The quality of Cortex.t is seamlessly integrated within the Bittensor network, allowing developers to harness the power of multiple subnets and modalities by building directly onto an existing validator, or through an API key from [Corcel](https://corcel.io). - -Cortex.t is also a transformative platform leveraging advanced AI models to generate synthetic prompt-response pairs. This novel method yields a comprehensive dataset of interactions, archived in wandb [wandb.ai/cortex-t/synthetic-QA](https://wandb.ai/cortex-t/synthetic-QA). The process involves recycling model outputs back into the system, using a prompt evolution and data augmentation strategy similar to Microsoft's approach in developing WizardLM. This enables the distillation of sophisticated AI models into smaller, yet efficient counterparts, mirroring the performance of their larger predecessors. Ultimately, Cortex.t democratizes access to high-end AI technology, encouraging innovation and customization. - -By leveraging synthetic data, Cortex.t circumvents the traditional challenges of data collection and curation, accelerating the development of AI models that are both robust and adaptable. This platform is your gateway to AI mastery, offering the unique opportunity to train your models with data that reflects the depth and versatility of the parent model. With SynthPairPro, you're not just collecting data; you're capturing intelligence, providing a path to creating AI models that mirror the advanced understanding and response capabilities of their predecessors. - -Join us at Cortex.t, your bridge to AI excellence, and democratise access to top-level AI capabilities. Be part of the AI revolution and stay at the forefront of innovation with SynthPairPro – Synthesizing Intelligence, Empowering the Future! - - -## Development - -### Testing - -install `nox` (`pip install nox`) and run `nox -s test`. - -## Setup - -### Before you proceed -Before you proceed with the installation of the subnet, note the following: - -**IMPORTANT**: We **strongly recommend** before proceeding that you test both subtensor and OpenAI API keys. Ensure you are running Subtensor locally to minimize chances of outages and improve the latency/connection. - -After exporting your OpenAI API key to your bash profile, test the streaming service for both the gpt-3.5-turbo and gpt-4 engines using ```./neurons/test_openai.py```. Neither the miner or the validator will function without a valid and working [OpenAI API key](https://platform.openai.com/). - -**IMPORTANT:** Make sure you are aware of the minimum compute requirements for cortex.t. See the [Minimum compute YAML configuration](./min_compute.yml). -Note that this subnet requires very little compute. The main functionality is api calls, so we outsource the compute to openai. The cost for mining and validating on this subnet comes from api calls, not from compute. Please be aware of your API costs and monitor accordingly. - -A high tier key is required for both mining and validations so it is important if you do not have one to work your way up slowly by running a single miner or small numbers of miners whilst payiing attention to your usage and limits. - - -### Installation - -Download the repository, navigate to the folder and then install the necessary requirements with the following chained command. - -```git clone https://github.com/corcel-api/cortex.t.git && cd cortex.t && pip install -e .``` - -Prior to proceeding, ensure you have a registered hotkey on subnet 18 mainnet. If not, run the command `btcli s register --netuid 18 --wallet.name [wallet_name] --wallet.hotkey [wallet.hotkey]`. - -We recommend using [direnv](https://direnv.net). After installing it, copy `envrc.example` to `.envrc` and substitute -all env vars with values appropriate for your accounts. After making changes to `.envrc` run `direnv allow` and start a -new terminal tab. - -## Mining - -You can launch your miners via pm2 using the following command. - -`pm2 start ./miner/miner.py --interpreter python3 -- --netuid 18 --subtensor.network --wallet.name --wallet.hotkey --axon.port ` - - -## Validating - -You can launch your validator via pm2 using the following command. - -`pm2 start ./validators/validator.py --interpreter python3 -- --netuid 18 --subtensor.network --wallet.name --wallet.hotkey ` - - -## Logging - -As cortex.t supports streaming natively, you do not (and should not) enable `logging.trace` or `logging.debug` as all of the important information is already output to `logging.info` which is set as default. - ---- - -## License -This repository is licensed under the MIT License. -```text -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -``` diff --git a/Cortex.t.egg-info/SOURCES.txt b/Cortex.t.egg-info/SOURCES.txt deleted file mode 100644 index 8e060877..00000000 --- a/Cortex.t.egg-info/SOURCES.txt +++ /dev/null @@ -1,14 +0,0 @@ -LICENSE -README.md -setup.py -Cortex.t.egg-info/PKG-INFO -Cortex.t.egg-info/SOURCES.txt -Cortex.t.egg-info/dependency_links.txt -Cortex.t.egg-info/requires.txt -Cortex.t.egg-info/top_level.txt -base/__init__.py -template/__init__.py -template/protocol.py -template/reward.py -template/utils.py -test_base/__init__.py \ No newline at end of file diff --git a/Cortex.t.egg-info/dependency_links.txt b/Cortex.t.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891..00000000 --- a/Cortex.t.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/Cortex.t.egg-info/requires.txt b/Cortex.t.egg-info/requires.txt deleted file mode 100644 index 7435c4fa..00000000 --- a/Cortex.t.egg-info/requires.txt +++ /dev/null @@ -1,16 +0,0 @@ -aiohttp==3.* -bittensor==6.* -datasets==2.* -envparse==0.2.0 -openai==1.*,>=1.3.2 -Pillow==10.* -requests==2.* -scikit-learn==1.* -torch==2.* -transformers==4.* -wandb -anthropic -stability-sdk -boto3 -anthropic_bedrock -pyOpenSSL diff --git a/Cortex.t.egg-info/top_level.txt b/Cortex.t.egg-info/top_level.txt deleted file mode 100644 index 6976312d..00000000 --- a/Cortex.t.egg-info/top_level.txt +++ /dev/null @@ -1,3 +0,0 @@ -base -template -test_base diff --git a/README.md b/README.md index 7401e999..3fe8f475 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,9 @@ Introducing Bittensor Subnet 18 (Cortex.t): A Pioneering Platform for AI Development and Synthetic Data Generation. -Cortex.t stands at the forefront of artificial intelligence, offering a dual-purpose solution that caters to the needs of app developers and innovators in the AI space. This platform is meticulously designed to deliver reliable, high-quality text and image responses through API usage, utilising the decentralised Bittensor network. It serves as a cornerstone for creating a fair, transparent, and manipulation-free environment for the incentivised production of intelligence (mining) and generation and fulfilment of diverse user prompts. +Cortex.t stands at the forefront of artificial intelligence, offering a dual-purpose solution that caters to the needs of app developers and innovators in the AI space. This platform is meticulously designed to deliver reliable, high-quality text, image, and TTS responses through API usage, utilising the decentralised Bittensor network. It serves as a cornerstone for creating a fair, transparent, and manipulation-free environment for the incentivised production of intelligence (mining) and generation and fulfilment of diverse user prompts. -Our initiative is a leap forward in redefining the reward system for text and image prompting with a commitment to providing stability and reassurance to developers. By focusing on the value delivered to clients, we alleviate the concerns of data inconsistencies that often plague app development. The quality of Cortex.t is seamlessly integrated within the Bittensor network, allowing developers to harness the power of multiple subnets and modalities by building directly onto an existing validator, or through an API key from [Corcel](https://corcel.io). +Our initiative is a leap forward in redefining the reward system for text, image, and TTS prompting with a commitment to providing stability and reassurance to developers. By focusing on the value delivered to clients, we alleviate the concerns of data inconsistencies that often plague app development. The quality of Cortex.t is seamlessly integrated within the Bittensor network, allowing developers to harness the power of multiple subnets and modalities by building directly onto an existing validator, or through an API key from [Corcel](https://corcel.io). Cortex.t is also a transformative platform leveraging advanced AI models to generate synthetic prompt-response pairs. This novel method yields a comprehensive dataset of interactions, archived in wandb [wandb.ai/cortex-t/synthetic-QA](https://wandb.ai/cortex-t/synthetic-QA). The process involves recycling model outputs back into the system, using a prompt evolution and data augmentation strategy similar to Microsoft's approach in developing WizardLM. This enables the distillation of sophisticated AI models into smaller, yet efficient counterparts, mirroring the performance of their larger predecessors. Ultimately, Cortex.t democratizes access to high-end AI technology, encouraging innovation and customization. diff --git a/bittensor_subnet_template.egg-info/SOURCES.txt b/bittensor_subnet_template.egg-info/SOURCES.txt deleted file mode 100644 index 54608449..00000000 --- a/bittensor_subnet_template.egg-info/SOURCES.txt +++ /dev/null @@ -1,14 +0,0 @@ -LICENSE -README.md -setup.py -base/__init__.py -bittensor_subnet_template.egg-info/PKG-INFO -bittensor_subnet_template.egg-info/SOURCES.txt -bittensor_subnet_template.egg-info/dependency_links.txt -bittensor_subnet_template.egg-info/requires.txt -bittensor_subnet_template.egg-info/top_level.txt -template/__init__.py -template/protocol.py -template/reward.py -template/utils.py -test_base/__init__.py \ No newline at end of file diff --git a/bittensor_subnet_template.egg-info/dependency_links.txt b/bittensor_subnet_template.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891..00000000 --- a/bittensor_subnet_template.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/bittensor_subnet_template.egg-info/top_level.txt b/bittensor_subnet_template.egg-info/top_level.txt deleted file mode 100644 index 6976312d..00000000 --- a/bittensor_subnet_template.egg-info/top_level.txt +++ /dev/null @@ -1,3 +0,0 @@ -base -template -test_base diff --git a/cortext/__init__.py b/cortext/__init__.py index 36695b13..1af58bcb 100644 --- a/cortext/__init__.py +++ b/cortext/__init__.py @@ -45,6 +45,7 @@ PROMPT_BLACKLIST_STAKE = 20000 IMAGE_BLACKLIST_STAKE = 20000 EMBEDDING_BLACKLIST_STAKE = 20000 +TTS_BLACKLIST_STAKE = 20000 ISALIVE_BLACKLIST_STAKE = min(PROMPT_BLACKLIST_STAKE, IMAGE_BLACKLIST_STAKE, EMBEDDING_BLACKLIST_STAKE) MIN_REQUEST_PERIOD = 2 MAX_REQUESTS = 12 @@ -3737,6 +3738,16 @@ 'Mystical Landscapes' ] +TTS_THEMES = [ + 'News and weather', + 'Advertising', + 'Health and wellness', + 'Historical events', + 'Sports news', + 'Trivia', + 'Paparaazi', +] + # Import all submodules. from . import protocol diff --git a/cortext/protocol.py b/cortext/protocol.py index 653c8b7b..2369a2bf 100644 --- a/cortext/protocol.py +++ b/cortext/protocol.py @@ -383,4 +383,49 @@ class TextPrompting(bt.Synapse): default=True, title="streaming", description="whether to stream the output", - ) \ No newline at end of file + ) + + +class TTSResponse(bt.Synapse): + """ A class to represent the TTS request and response. """ + + text: str = pydantic.Field( + title="Text", + description="The input text to generate audio for." + ) + + provider: Literal["ElevenLabs"] = pydantic.Field( + default="ElevenLabs", + title="Provider", + description="The provider to use when calling for your response." + ) + + model: str = pydantic.Field( + default="eleven_multilingual_v2", + title="Model", + description="The model used for TTS." + ) + + voice: str = pydantic.Field( + default="Rachel", + title="Voice", + description="The voice used for TTS." + ) + + audio_b64: Optional[str] = pydantic.Field( + None, + title="Base64-encoded audio", + description="The resulting audio in base64 encoding corresponding to the input text." + ) + + uid: int = pydantic.Field( + default=60, + title="uid", + description="The UID to send the synapse to", + ) + + timeout: int = pydantic.Field( + default=60, + title="timeout", + description="The timeout for the dendrite of the synapse", + ) diff --git a/cortext/reward.py b/cortext/reward.py index 90058d4d..0f4f6ce0 100644 --- a/cortext/reward.py +++ b/cortext/reward.py @@ -26,21 +26,26 @@ import torch import openai import typing +import string import difflib import asyncio import logging import aiohttp import requests import traceback +import jiwer import numpy as np from numpy.linalg import norm import bittensor as bt from cortext import utils from PIL import Image from scipy.spatial.distance import cosine +import scipy.stats from sklearn.metrics.pairwise import cosine_similarity from sklearn.feature_extraction.text import TfidfVectorizer from transformers import CLIPProcessor, CLIPModel +from speechmos import dnsmos +from faster_whisper import WhisperModel # ==== TEXT ==== @@ -264,3 +269,41 @@ async def embeddings_score_dot(openai_answer: list, response: list, weight: floa bt.logging.info(f"Average embeddings cosine similarity does not exceed threshold: {avg_cosine_similarity}") return 0 + +# ==== TTS ===== +def get_whisper_model( + model_type: typing.Literal["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"], + device: str = "cpu", + compute_type: typing.Literal["float16", "float32", "bfloat16", "int8", "int8_float16"] = "int8", + max_length: int = 2048, +) -> WhisperModel: + model = WhisperModel(model_type, device=device, compute_type=compute_type) + model.max_length = max_length + return model + +def calculate_odds(mean_observed: float, n: int, mean_y: float, var_y: float) -> np.ndarray: + std_y = var_y ** 0.5 + Z = (mean_observed - mean_y) / (std_y / (n**0.5)) + + # Calculate the probability using the CDF + probability = scipy.stats.norm.cdf(Z) + odds = probability / (1 - probability) + + return odds + +def lower_remove_punctuation(text: str) -> str: + return text.lower().translate(str.maketrans('', '', string.punctuation)).strip() + +def calculate_wer(audio, model: WhisperModel, text: str) -> float: + with torch.inference_mode(): + segments, _ = model.transcribe(audio, beam_size=5, max_new_tokens=4000, without_timestamps=True, language=None) + output = " ".join(x.text for x in segments) + + output = lower_remove_punctuation(output) + text = lower_remove_punctuation(text) + wer = jiwer.wer(output, text) + + return wer + +def dnsmos_score(audio, sr: int) -> float: + return dnsmos.run(audio, sr=sr, return_df=True, verbose=False)['ovrl_mos'] diff --git a/cortext/utils.py b/cortext/utils.py index 5c9ef82a..8d188537 100644 --- a/cortext/utils.py +++ b/cortext/utils.py @@ -78,7 +78,8 @@ def load_state_from_file(filename: str): # Return the default state structure return { "text": {"themes": None, "questions": None, "theme_counter": 0, "question_counter": 0}, - "images": {"themes": None, "questions": None, "theme_counter": 0, "question_counter": 0} + "images": {"themes": None, "questions": None, "theme_counter": 0, "question_counter": 0}, + "tts": {"themes": None, "questions": None, "theme_counter": 0, "question_counter": 0}, } state = None @@ -113,7 +114,7 @@ def get_validators_with_runs_in_all_projects(): return common_validators async def get_list(list_type, num_questions_needed, theme=None): - prompts_in_question = {'text_questions': 10, 'images_questions': 20} + prompts_in_question = {'text_questions': 10, 'images_questions': 20, 'tts_questions': 20} list_type_mapping = { "text_questions": { "default": cortext.INSTRUCT_DEFAULT_QUESTIONS, @@ -122,6 +123,13 @@ async def get_list(list_type, num_questions_needed, theme=None): "images_questions": { "default": cortext.IMAGE_DEFAULT_QUESTIONS, "prompt": f"Provide a python-formatted list of {prompts_in_question[list_type]} creative and detailed scenarios for image generation, each inspired by the theme '{theme}'. The scenarios should be diverse, thoughtful, and possibly out-of-the-box interpretations related to '{theme}'. Each element in the list should be a concise, but a vividly descriptive situation designed to inspire visually rich stories. Format these elements as comma-separated, quote-encapsulated strings in a single Python list." + }, + "tts_questions": { + "prompt": ( + f"Provide a python-formatted list of {prompts_in_question[list_type]} sentences for text-to-speech about the theme '{theme}'." + "The sentences should be around 20-30 words long and should be relevant to the theme but not monotinic statements. Instead, make them engaging and emotionally expressive to inspire a rich and diverse set of audio samples." + "Format these elements as comma-separated, quote-encapsulated strings in a single Python list." + ) } } @@ -206,6 +214,8 @@ async def get_items(category, item_type, theme=None): if item_type == "themes": if category == "images": return cortext.IMAGE_THEMES + elif category == "tts": + return cortext.TTS_THEMES return cortext.INSTRUCT_DEFAULT_THEMES else: # Never fail here, retry until valid list is found @@ -238,13 +248,12 @@ async def get_random_theme(category): item = items.pop() if items else None if not items: state[category][item_type] = None - return item async def get_question(category, num_questions_needed): - if category not in ["text", "images"]: - raise ValueError("Invalid category. Must be 'text' or 'images'.") + if category not in ["text", "images", "tts"]: + raise ValueError("Invalid category. Must be 'text', 'images', or 'tts'.") question = await update_counters_and_get_new_list(category, "questions", num_questions_needed) return question diff --git a/miner/miner.py b/miner/miner.py index 0e3c91a0..4dd417b0 100644 --- a/miner/miner.py +++ b/miner/miner.py @@ -13,6 +13,7 @@ from collections import deque from functools import partial from typing import Tuple +from base64 import b64encode import bittensor as bt import google.generativeai as genai @@ -26,11 +27,13 @@ from PIL import Image import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation from anthropic_bedrock import AsyncAnthropicBedrock, HUMAN_PROMPT, AI_PROMPT, AnthropicBedrock +from elevenlabs.client import AsyncElevenLabs import cortext -from cortext.protocol import Embeddings, ImageResponse, IsAlive, StreamPrompting, TextPrompting +from cortext.protocol import Embeddings, ImageResponse, IsAlive, StreamPrompting, TextPrompting, TTSResponse from cortext.utils import get_version import sys +from functools import partial from starlette.types import Send @@ -82,6 +85,15 @@ genai.configure(api_key=google_key) +# ElevenLabs +eleven_key = os.environ.get("ELEVEN_API_KEY") +if not eleven_key: + raise ValueError("Please set the ELEVEN_API_KEY environment variable.") +eleven_client = AsyncElevenLabs( + api_key=eleven_key +) + + # Wandb netrc_path = pathlib.Path.home() / ".netrc" @@ -155,6 +167,9 @@ def __init__(self, config=None, axon=None, wallet=None, subtensor=None): ).attach( forward_fn=self.embeddings, blacklist_fn=self.blacklist_embeddings, + ).attach( + forward_fn=self.tts, + blacklist_fn=self.blacklist_tts, ).attach( forward_fn=self.text, ) @@ -249,6 +264,11 @@ def blacklist_embeddings( self, synapse: Embeddings ) -> Tuple[bool, str]: bt.logging.info(blacklist[1]) return blacklist + def blacklist_tts( self, synapse: TTSResponse ) -> Tuple[bool, str]: + blacklist = self.base_blacklist(synapse, cortext.TTS_BLACKLIST_STAKE) + bt.logging.info(blacklist[1]) + return blacklist + def run(self): if not self.subtensor.is_hotkey_registered( netuid=self.config.netuid, @@ -597,6 +617,36 @@ async def get_embeddings_in_batch(texts, model, batch_size=10): except Exception: bt.logging.error(f"Exception in embeddings function: {traceback.format_exc()}") + async def tts(self, synapse: TTSResponse) -> TTSResponse: + bt.logging.info(f"entered tts processing") + + if synapse.provider == "ElevenLabs": + generation_fn = eleven_client.generate + generation_kwargs = { + 'voice': synapse.voice, + 'model': synapse.model, + 'text': synapse.text, + } + else: + raise ValueError(f"Unknown provider: {synapse.provider}") + + try: + tasks = [asyncio.create_task(generation_fn(**generation_kwargs))] + results = await asyncio.gather(*tasks, return_exceptions=True) + for result in results: + if isinstance(result, Exception): + bt.logging.error(f"Error in processing batch: {result}") + elif synapse.provider == "ElevenLabs": + output = b'' + async for value in result: + output += value + synapse.audio_b64 = b64encode(output).decode('ascii') + + bt.logging.info(f"tts completed successfully") + return synapse + except Exception: + bt.logging.error(f"Exception in embeddings function: {traceback.format_exc()}") + async def is_alive(self, synapse: IsAlive) -> IsAlive: bt.logging.debug("answered to be active") synapse.completion = "True" diff --git a/requirements.txt b/requirements.txt index 9d782f9b..27c161a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,9 @@ boto3 anthropic_bedrock pyOpenSSL==24.* google-generativeai +elevenlabs +speechmos +librosa +jiwer +faster-whisper +soundfile \ No newline at end of file diff --git a/validators/base_validator.py b/validators/base_validator.py index 0ff2254c..1b102072 100644 --- a/validators/base_validator.py +++ b/validators/base_validator.py @@ -1,6 +1,8 @@ from abc import ABC, abstractmethod +from typing import Any import bittensor as bt +import torch class BaseValidator(ABC): @@ -26,11 +28,20 @@ async def handle_response(self, uid, responses): return uid, responses @abstractmethod - async def start_query(self, available_uids) -> tuple[list, dict]: + async def start_query( + self, + available_uids: list[int], + metagraph: bt.metagraph + ) -> tuple[list, dict]: ... @abstractmethod - async def score_responses(self, responses): + async def score_responses( + self, + query_responses: list[tuple[int, Any]], # [(uid, response)] + uid_to_question: dict[int, str], # uid -> prompt + metagraph: bt.metagraph, + ) -> tuple[torch.Tensor, dict[int, float], dict]: ... async def get_and_score(self, available_uids, metagraph): diff --git a/validators/tts_validator.py b/validators/tts_validator.py new file mode 100644 index 00000000..d8266432 --- /dev/null +++ b/validators/tts_validator.py @@ -0,0 +1,124 @@ +import torch +import wandb +import random +import asyncio +import base64 +import traceback +import librosa +import cortext.reward +import bittensor as bt + +from io import BytesIO +from cortext.utils import get_question +from base_validator import BaseValidator +from cortext.protocol import TTSResponse +from pydantic import BaseModel + +class TTSProvider(BaseModel): + name: str + weight: float + models: tuple[str] + voices: tuple[str] + +providers = ( + TTSProvider( + name="ElevenLabs", + weight=1.0, + models=("eleven_multilingual_v2",), + voices=("Rachel",) + ), +) + +class TTSValidator(BaseValidator): + def __init__(self, dendrite, config, subtensor, wallet): + super().__init__(dendrite, config, subtensor, wallet, timeout=30) + self.asr_model = cortext.reward.get_whisper_model("large-v3", "cpu", "int8", 4096) + self.sr: int = self.asr_model.feature_extractor.sampling_rate + self.provider: TTSProvider = None + self.model: str = None + self.voice: str = None + self.wandb_data: dict = { + "modality": "audio", + "prompts": {}, + "responses": {}, + "audio": {}, + "scores": {}, + "timestamps": {}, + } + + async def start_query(self, available_uids, metagraph): + try: + query_tasks = [] + uid_to_question = {} + + # Randomly choose the provider based on specified probabilities + self.provider = random.choices(providers, weights=[provider.weight for provider in providers], k=1)[0] + self.model = self.provider.models[0] + self.voice = random.choice(self.provider.voices) + + # Query all prompts concurrently + + for uid in available_uids: + messages = await get_question("tts", len(available_uids)) + uid_to_question[uid] = messages # Store messages for each UID + + syn = TTSResponse( + text=messages, + provider=self.provider.name, + model=self.model, + voice=self.voice, + ) + bt.logging.info(f"uid = {uid}, syn = {syn}") + + # bt.logging.info( + # f"Sending a {self.size} {self.quality} {self.style} {self.query_type} request " + # f"to uid: {uid} using {syn.model} with timeout {self.timeout}: {syn.messages}" + # ) + task = self.query_miner(metagraph, uid, syn) + query_tasks.append(task) + self.wandb_data["prompts"][uid] = messages + + # Query responses is (uid. syn) + query_responses = await asyncio.gather(*query_tasks) + return query_responses, uid_to_question + except: + bt.logging.error(f"error in start_query:\n{traceback.format_exc()}") + + + async def score_responses(self, query_responses, uid_to_question, metagraph): + scores = torch.zeros(len(metagraph.hotkeys)) + uid_scores_dict = {} + rand = random.random() + will_score_all = rand < 1/1 + + for uid, syn in query_responses: + try: + syn = syn[0] + audio_b64 = syn.audio_b64 + if audio_b64 is None: + scores[uid] = uid_scores_dict[uid] = 0 + continue + + bt.logging.info(f"UID {uid} responded with a file") + with BytesIO(base64.b64decode(audio_b64)) as f: + audio, _ = librosa.load(f, sr=self.sr) + audio = audio.clip(-1, 1) + + if will_score_all: + wer = cortext.reward.calculate_wer(audio, self.asr_model, uid_to_question[uid]) + if wer > 0.5: + scores[uid] = uid_scores_dict[uid] = 0 + continue + mos = cortext.reward.dnsmos_score(audio, self.sr) + scores[uid] = uid_scores_dict[uid] = mos + # calculate_odds(sum(scores) / len(scores), len(scores), 3.28, 0.15) + + self.wandb_data["audio"][uid] = wandb.Audio(audio, self.sr, caption=uid_to_question[uid]) + + except: + bt.logging.debug(f"error in score_responses for uid {uid}:\n{traceback.format_exc()}") + + bt.logging.info(f"Final scores: {uid_scores_dict}") + bt.logging.info("score_responses process completed.") + return scores, uid_scores_dict, self.wandb_data + diff --git a/validators/validator.py b/validators/validator.py index 15cd2291..1809d8b2 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -17,6 +17,7 @@ from image_validator import ImageValidator from embeddings_validator import EmbeddingsValidator from text_validator import TextValidator +from tts_validator import TTSValidator from base_validator import BaseValidator from envparse import env @@ -29,6 +30,7 @@ text_vali = None image_vali = None embed_vali = None +tts_vali = None metagraph = None wandb_runs = {} @@ -101,11 +103,12 @@ def initialize_components(config: bt.config): def initialize_validators(vali_config, test=False): - global text_vali, image_vali, embed_vali + global text_vali, image_vali, embed_vali, tts_vali text_vali = TextValidator(**vali_config) image_vali = ImageValidator(**vali_config) embed_vali = EmbeddingsValidator(**vali_config) + tts_vali = TTSValidator(**vali_config) bt.logging.info("initialized_validators") @@ -121,7 +124,7 @@ def main(test=False) -> None: initialize_validators(validator_config, test) init_wandb(config, my_uid, wallet) loop = asyncio.get_event_loop() - weight_setter = WeightSetter(loop, dendrite, subtensor, config, wallet, text_vali, image_vali, embed_vali) + weight_setter = WeightSetter(loop, dendrite, subtensor, config, wallet, text_vali, image_vali, embed_vali, tts_vali) state_path = os.path.join(config.full_path, "state.json") utils.get_state(state_path) try: diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 65c48e06..917bf568 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -39,7 +39,7 @@ from anthropic_bedrock import AsyncAnthropicBedrock, HUMAN_PROMPT, AI_PROMPT, AnthropicBedrock import cortext -from cortext.protocol import Embeddings, ImageResponse, IsAlive, StreamPrompting, TextPrompting +from cortext.protocol import Embeddings, ImageResponse, IsAlive, StreamPrompting, TextPrompting, TTSResponse from cortext.utils import get_version import sys @@ -55,7 +55,7 @@ class WeightSetter: - def __init__(self, loop: asyncio.AbstractEventLoop, dendrite, subtensor, config, wallet, text_vali, image_vali, embed_vali): + def __init__(self, loop: asyncio.AbstractEventLoop, dendrite, subtensor, config, wallet, text_vali, image_vali, embed_vali, tts_vali): bt.logging.info("starting weight setter") self.config = config bt.logging.info(f"config:\n{self.config}") @@ -68,6 +68,7 @@ def __init__(self, loop: asyncio.AbstractEventLoop, dendrite, subtensor, config, self.text_vali = text_vali self.image_vali = image_vali self.embed_vali = embed_vali + self.tts_vali = tts_vali self.moving_average_scores = None self.axon = bt.axon(wallet=self.wallet, port=self.config.axon.port) self.metagraph = self.subtensor.metagraph(config.netuid) @@ -104,6 +105,11 @@ def blacklist_embeddings( self, synapse: Embeddings ) -> Tuple[bool, str]: bt.logging.info(blacklist[1]) return blacklist + def blacklist_tts( self, synapse: TTSResponse ) -> Tuple[bool, str]: + blacklist = self.base_blacklist(synapse, cortext.TTS_BLACKLIST_STAKE) + bt.logging.info(blacklist[1]) + return blacklist + def base_blacklist(self, synapse, blacklist_amt = 20000) -> Tuple[bool, str]: try: hotkey = synapse.dendrite.hotkey @@ -136,6 +142,14 @@ async def embeddings(self, synapse: Embeddings) -> Embeddings: bt.logging.info(f"new synapse = {synapse}") return synapse + async def tts(self, synapse: TTSResponse) -> TTSResponse: + bt.logging.info(f"received {synapse}") + + synapse = self.dendrite.query(self.metagraph.axons[synapse.uid], synapse, deserialize=False, timeout=synapse.timeout) + + bt.logging.info(f"new synapse = {synapse}") + return synapse + async def prompt(self, synapse: StreamPrompting) -> StreamPrompting: bt.logging.info(f"received {synapse}") @@ -190,6 +204,9 @@ async def consume_organic_scoring(self): ).attach( forward_fn=self.embeddings, blacklist_fn=self.blacklist_embeddings, + ).attach( + forward_fn=self.tts, + blacklist_fn=self.blacklist_tts, ).attach( forward_fn=self.text, ) @@ -246,7 +263,13 @@ async def perform_synthetic_scoring_and_update_weights(self): await asyncio.sleep(60) def select_validator(self, steps_passed): - return self.text_vali if steps_passed % 10 in (0, 1, 2, 3, 4, 5, 6, 7, 8) else self.image_vali + step_mod = steps_passed % 12 + if step_mod <= 9: + return self.text_vali + elif step_mod <= 11: + return self.image_vali + elif step_mod <= 13: + return self.tts_vali async def get_available_uids(self): """Get a dictionary of available UIDs and their axons asynchronously."""