iws3 · verdzy-kin · Sep 24, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/done.txt b/done.txt
@@ -0,0 +1,7 @@
+Name: Delsy Kinyuy
+Exercise: 3 - Sentiment Analysis API
+Challenges:
+- Model loads fast, but first run needed internet to fetch weights
+- Sentiment labels limited to POSITIVE/NEGATIVE (no neutral)
+- Sarcasm detection is tricky, outputs can be misleading
+
diff --git a/exercise1.py b/exercise1.py
@@ -0,0 +1,48 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import pipeline
+
+# init FastAPI
+app = FastAPI(title="Generative AI Exercises")
+
+# load Hugging Face model (distilgpt2 is small enough to run on CPU)
+generator = pipeline("text-generation", model="distilgpt2")
+
+# request schema
+class PromptRequest(BaseModel):
+    prompt: str
+    max_tokens: int = 100
+
+@app.get("/")
+def root():
+    return {"message": "FastAPI + Hugging Face is live 🚀"}
+
+@app.post("/hello-llm")
+def hello_llm(request: PromptRequest):
+    """
+    Generate text from a given prompt using distilgpt2.
+    """
+    output = generator(
+        request.prompt,
+        max_length=len(request.prompt.split()) + request.max_tokens,
+        num_return_sequences=1
+    )
+    return {"prompt": request.prompt, "generated": output[0]["generated_text"]}
+# answer interview questions
+# **Interview Questions:**
+
+# 1. What is a language model?
+# answer: a language model is a statistical tool that predicts the next word in a sequence based on the words that came before it. It is trained on large datasets of text to learn patterns, grammar, and context, enabling it to generate coherent and contextually relevant text.
+# 2. How does GPT-2 differ from GPT-3/4?
+# answer: GPT-2 is smaller and less powerful than GPT-3/4, with fewer parameters and less training data. GPT-3/4 can generate more coherent and contextually relevant text, handle more complex tasks, and understand nuanced prompts better than GPT-2.
+# 3. Why is `distilgpt2` considered lightweight?
+# answer: `distilgpt2` is a distilled version of GPT-2, meaning it has been compressed to reduce its size and computational requirements while retaining much of the original model's performance. This makes it more efficient and faster to run, especially on hardware with limited resources.
+# 4. What are tokens, and why do they matter in LLMs?
+# answer: Tokens are the basic units of text that a language model processes, which can be words, subwords, or characters. They matter because LLMs have limits on the number of tokens they can handle in a single input or output, affecting the model's ability to understand and generate text effectively.
+# 5. How do you handle prompt length limits?
+# answer: To handle prompt length limits, you can truncate or summarize the input text to fit within the model's maximum token limit. Additionally, you can use techniques like sliding windows for longer texts or break down the input into smaller, manageable chunks.
+# 6. Why expose models through an API instead of CLI?
+# answer: Exposing models through an API allows for easier integration with various applications, enabling remote access and scalability. It also provides a more user-friendly interface for developers and users who may not be comfortable with command-line interfaces (CLI).
+# 7. What’s the risk of directly exposing LLMs without moderation?
+# answer: Directly exposing LLMs without moderation can lead to the generation of harmful, biased, or inappropriate content. LLMs may inadvertently produce offensive language, misinformation, or content that violates ethical guidelines, which can harm users and damage the reputation of the service provider.
+
diff --git a/exercise2.py b/exercise2.py
@@ -0,0 +1,60 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import pipeline
+
+app = FastAPI(title="Generative AI Exercises")
+
+# ---- Load models once on startup ----
+generator = pipeline("text-generation", model="distilgpt2")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+
+# ---- Schemas ----
+class PromptRequest(BaseModel):
+    prompt: str
+    max_tokens: int = 50
+
+class SummarizeRequest(BaseModel):
+    text: str
+    max_tokens: int = 130
+    min_tokens: int = 40
+
+# ---- Routes ----
+@app.get("/")
+def root():
+    return {"message": "FastAPI + Hugging Face is live 🚀"}
+
+@app.post("/hello-llm")
+def hello_llm(request: PromptRequest):
+    output = generator(
+        request.prompt,
+        max_length=len(request.prompt.split()) + request.max_tokens,
+        num_return_sequences=1
+    )
+    return {"prompt": request.prompt, "generated": output[0]["generated_text"]}
+
+@app.post("/summarize")
+def summarize(request: SummarizeRequest):
+    summary = summarizer(
+        request.text,
+        max_length=request.max_tokens,
+        min_length=request.min_tokens,
+        do_sample=False
+    )
+    return {"summary": summary[0]["summary_text"]}
+
+# **Interview Questions:**
+
+# 1. What is abstractive vs extractive summarization?
+# answer: Abstractive summarization generates new phrases and sentences to capture the main ideas of the text, while extractive summarization selects and compiles key sentences or phrases directly from the original text.
+# 2. Why is BART good for summarization?
+# answer: BART is effective for summarization because it combines a bidirectional encoder (like BERT) with a left-to-right decoder (like GPT), allowing it to understand context and generate coherent summaries. Its pre-training on large text corpora helps it learn language patterns, making it adept at producing fluent and relevant summaries.
+# 3. What are encoder-decoder architectures?
+# answer: Encoder-decoder architectures consist of two main components: an encoder that processes the input data and encodes it into a fixed-size representation, and a decoder that takes this representation and generates the output sequence. This architecture is commonly used in tasks like machine translation and text summarization.
+# 4. How does beam search affect summary quality?
+# answer: Beam search improves summary quality by exploring multiple possible output sequences simultaneously, allowing the model to consider various options and select the most probable one. This leads to more coherent and contextually relevant summaries compared to greedy decoding, which only considers the most likely next word at each step.
+# 5. What are hallucinations in summarization?
+# answer: Hallucinations in summarization refer to instances where the model generates information that is not present in the original text, leading to inaccuracies or misleading content in the summary. This can occur when the model overgeneralizes or misinterprets the input data.
+# 6. What evaluation metrics exist (ROUGE, BLEU)?
+# answer: Evaluation metrics for summarization include ROUGE (Recall-Oriented Understudy for Gisting Evaluation), which measures the overlap of n-grams, word sequences, and word pairs between the generated summary and reference summaries. BLEU (Bilingual Evaluation Understudy) is another metric that evaluates the quality of text by comparing it to one or more reference texts, focusing on precision of n-grams.
+# 7. How would you fine-tune BART on legal documents?
+# answer: To fine-tune BART on legal documents, I would first gather a large dataset of legal texts and their corresponding summaries. Then, I would preprocess the data to ensure it is clean and formatted correctly. Next, I would use transfer learning to fine-tune the pre-trained BART model on this dataset, adjusting hyperparameters such as learning rate and batch size to optimize performance. Finally, I would evaluate the model using relevant metrics like ROUGE to ensure it generates accurate and coherent summaries of legal documents.
diff --git a/exercise3.py b/exercise3.py
@@ -0,0 +1,71 @@
+# main.py (extend from Exercises 1 & 2)
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import pipeline
+
+app = FastAPI(title="Generative AI Exercises")
+
+# ---- Load models ----
+generator = pipeline("text-generation", model="distilgpt2")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+
+# ---- Schemas ----
+class PromptRequest(BaseModel):
+    prompt: str
+    max_tokens: int = 50
+
+class SummarizeRequest(BaseModel):
+    text: str
+    max_tokens: int = 130
+    min_tokens: int = 30
+
+class SentimentRequest(BaseModel):
+    text: str
+
+# ---- Routes ----
+@app.get("/")
+def root():
+    return {"message": "FastAPI + Hugging Face is live 🚀"}
+
+@app.post("/hello-llm")
+def hello_llm(request: PromptRequest):
+    output = generator(
+        request.prompt,
+        max_length=len(request.prompt.split()) + request.max_tokens,
+        num_return_sequences=1
+    )
+    return {"prompt": request.prompt, "generated": output[0]["generated_text"]}
+
+@app.post("/summarize")
+def summarize(request: SummarizeRequest):
+    summary = summarizer(
+        request.text,
+        max_length=request.max_tokens,
+        min_length=request.min_tokens,
+        do_sample=False
+    )
+    return {"summary": summary[0]["summary_text"]}
+
+@app.post("/sentiment")
+def sentiment(request: SentimentRequest):
+    result = sentiment_analyzer(request.text)
+    return {"text": request.text, "sentiment": result[0]}
+
+# **Interview Questions:**
+
+# 1. What is transfer learning in NLP?
+# answer: Transfer learning in NLP involves taking a pre-trained model (trained on a large corpus of text) and fine-tuning it on a specific task or dataset. This allows the model to leverage learned language representations, reducing the need for large amounts of task-specific data and improving performance.
+# 2. Why use DistilBERT instead of BERT?
+# answer: DistilBERT is a smaller, faster, and more efficient version of BERT that retains about 97% of BERT's performance while being 60% faster and having 40% fewer parameters. This makes it more suitable for deployment in resource-constrained environments or applications requiring lower latency.
+# 3. What dataset is SST-2?
+# answer: SST-2 (Stanford Sentiment Treebank) is a dataset used for sentiment analysis that contains movie reviews labeled as positive or negative. It is widely used for training and evaluating sentiment classification models.
+# 4. What are embeddings in classification?
+# answer: Embeddings are dense vector representations of words or phrases that capture semantic meaning and relationships. In classification tasks, embeddings serve as input features for machine learning models, allowing them to understand the context and nuances of the text data.
+# 5. How do you evaluate classification performance?
+# answer: Classification performance can be evaluated using metrics such as accuracy, precision, recall, F1-score, and confusion matrix. The choice of metric depends on the specific task and the importance of false positives vs. false negatives.
+# 6. What biases can exist in sentiment models?
+# answer: Sentiment models can exhibit biases based on the training data, such as
+# 7. How would you handle sarcasm in sentiment detection?
+# answer: Sarcasm can be challenging for sentiment detection as it often involves saying the opposite of what is meant. To handle sarcasm, one could use more sophisticated models that consider context, tone, and user behavior. Additionally, incorporating datasets specifically labeled for sarcasm can help improve model performance in detecting sarcastic remarks.
diff --git a/exercise4.py b/exercise4.py
@@ -0,0 +1,82 @@
+# main.py (extend from Exercises 1–3)
+
+from fastapi import FastAPI, UploadFile, File
+from pydantic import BaseModel
+from transformers import pipeline
+from PIL import Image
+import io
+
+app = FastAPI(title="Generative AI Exercises")
+
+# ---- Load models ----
+generator = pipeline("text-generation", model="distilgpt2")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+
+# ---- Schemas ----
+class PromptRequest(BaseModel):
+    prompt: str
+    max_tokens: int = 50
+
+class SummarizeRequest(BaseModel):
+    text: str
+    max_tokens: int = 130
+    min_tokens: int = 30
+
+class SentimentRequest(BaseModel):
+    text: str
+
+# ---- Routes ----
+@app.get("/")
+def root():
+    return {"message": "FastAPI + Hugging Face is live 🚀"}
+
+@app.post("/hello-llm")
+def hello_llm(request: PromptRequest):
+    output = generator(
+        request.prompt,
+        max_length=len(request.prompt.split()) + request.max_tokens,
+        num_return_sequences=1
+    )
+    return {"prompt": request.prompt, "generated": output[0]["generated_text"]}
+
+@app.post("/summarize")
+def summarize(request: SummarizeRequest):
+    summary = summarizer(
+        request.text,
+        max_length=request.max_tokens,
+        min_length=request.min_tokens,
+        do_sample=False
+    )
+    return {"summary": summary[0]["summary_text"]}
+
+@app.post("/sentiment")
+def sentiment(request: SentimentRequest):
+    result = sentiment_analyzer(request.text)
+    return {"text": request.text, "sentiment": result[0]}
+
+@app.post("/caption-image")
+async def caption_image(file: UploadFile = File(...)):
+    contents = await file.read()
+    image = Image.open(io.BytesIO(contents)).convert("RGB")
+    caption = captioner(image)
+    return {"filename": file.filename, "caption": caption[0]["generated_text"]}
+
+
+# **Interview Questions:**
+
+# 1. How does ViT process images?
+# answeR: ViT splits images into patches, processes them as sequences, and uses self-attention to capture relationships.
+# 2. What role does GPT-2 play in captioning?
+# answer: GPT-2 generates coherent text based on the visual features extracted by the vision encoder.
+# 3. Why combine a vision encode=r with a language decoder?
+# answer: Combining them allows the model to understand visual content and generate relevant textual descriptions.
+# 4. What datasets are used for captioning?
+# answer: Common datasets include MS COCO, Flickr8k, and Flickr30k.
+# 5. What challenges exist in image captioning?
+# answer: Challenges include understanding context, handling diverse objects, and generating natural language.
+# 6. How do you evaluate captions (BLEU, CIDEr)?
+# answer: BLEU measures n-gram overlap, while CIDEr evaluates consensus with multiple references.
+# 7. What real-world apps use capti=oning?
+# answer: Real-world applications include accessibility tools, content management, and social media platforms.