Fundamentals
Transformer Architecture
class Transformer(nn.Module):
def __init__(self, d_model, nhead, num_layers):
super().__init__()
self.encoder = TransformerEncoder(
TransformerEncoderLayer(d_model, nhead),
num_layers
)
self.decoder = TransformerDecoder(
TransformerDecoderLayer(d_model, nhead),
num_layers
)
# Self-Attention Mechanism
def scaled_dot_product_attention(Q, K, V, mask=None):
d_k = Q.size(-1)
scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(d_k)
if mask is not None:
scores = scores.masked_fill(mask == 0, -1e9)
attention = F.softmax(scores, dim=-1)
return torch.matmul(attention, V)
Attention Mechanisms
class MultiHeadAttention(nn.Module):
def __init__(self, d_model, num_heads):
super().__init__()
self.d_model = d_model
self.num_heads = num_heads
self.d_k = d_model // num_heads
self.W_q = nn.Linear(d_model, d_model)
self.W_k = nn.Linear(d_model, d_model)
self.W_v = nn.Linear(d_model, d_model)
self.W_o = nn.Linear(d_model, d_model)
def forward(self, Q, K, V, mask=None):
batch_size = Q.size(0)
Q = self.W_q(Q).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
K = self.W_k(K).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
V = self.W_v(V).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
attention = scaled_dot_product_attention(Q, K, V, mask)
attention = attention.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
return self.W_o(attention)
LLM Architectures
GPT Architecture
class GPT(nn.Module):
def __init__(self, vocab_size, d_model, nhead, num_layers):
super().__init__()
self.token_embedding = nn.Embedding(vocab_size, d_model)
self.position_embedding = nn.Embedding(1000, d_model) # max sequence length
self.decoder_layers = nn.ModuleList([
TransformerDecoderLayer(d_model, nhead)
for _ in range(num_layers)
])
self.fc_out = nn.Linear(d_model, vocab_size)
def forward(self, src, src_mask=None):
batch_size, seq_len = src.shape
positions = torch.arange(0, seq_len).expand(batch_size, seq_len).to(src.device)
x = self.token_embedding(src) + self.position_embedding(positions)
for layer in self.decoder_layers:
x = layer(x, src_mask)
return self.fc_out(x)
# Causal mask for autoregressive generation
def generate_square_subsequent_mask(sz):
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
mask = mask.float().masked_fill(mask == 0, float(-1e9)).masked_fill(mask == 1, float(0.0))
return mask
BERT Architecture
class BERT(nn.Module):
def __init__(self, vocab_size, d_model, nhead, num_layers):
super().__init__()
self.token_embedding = nn.Embedding(vocab_size, d_model)
self.position_embedding = nn.Embedding(512, d_model) # max sequence length
self.segment_embedding = nn.Embedding(2, d_model) # for sentence pairs
self.encoder_layers = nn.ModuleList([
TransformerEncoderLayer(d_model, nhead)
for _ in range(num_layers)
])
self.classifier = nn.Linear(d_model, 2) # for classification tasks
def forward(self, input_ids, token_type_ids=None, attention_mask=None):
seq_length = input_ids.size(1)
position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
if token_type_ids is None:
token_type_ids = torch.zeros_like(input_ids)
embeddings = self.token_embedding(input_ids) + self.position_embedding(position_ids) + self.segment_embedding(token_type_ids)
for layer in self.encoder_layers:
embeddings = layer(embeddings, attention_mask)
return self.classifier(embeddings[:, 0, :]) # use [CLS] token for classification
Prompt Engineering
Basic Prompt Techniques
"Classify the text: 'The movie was fantastic with great acting.' Sentiment:"
"Text: 'This product is amazing!' Sentiment: Positive
Text: 'The service was terrible.' Sentiment: Negative
Text: 'It was okay, nothing special.' Sentiment: Neutral
Text: 'The acting was superb.' Sentiment:"
"Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
Q: The cafeteria had 23 apples. They used 20 to make lunch. They bought 6 more. How many apples do they have now?"
Advanced Prompt Techniques
"You are an expert software architect with 20 years of experience. Design a microservices architecture for an e-commerce platform that needs to handle 1 million daily users."
"Create a marketing email for a new productivity app called 'FocusTime'.
Subject: [ catchy subject line ]
Body: [ engaging content about the app's features ]
Call-to-action: [ compelling CTA button text ]"
"If the user is a beginner, explain machine learning in simple terms with everyday examples. If the user is an expert, provide a technical overview with mathematical formulations."
"First, outline the main points about climate change. Then, expand each point with supporting evidence. Finally, create a compelling conclusion that calls for action."
Fine-Tuning Techniques
Full Fine-Tuning
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
# Load pre-trained model and tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Prepare training arguments
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
warmup_steps=500,
weight_decay=0.01,
logging_dir="./logs",
)
# Create Trainer instance
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
)
# Start training
trainer.train()
Parameter-Efficient Fine-Tuning
from peft import LoraConfig, get_peft_model, TaskType
# Define LoRA configuration
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=8,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["q_proj", "v_proj"],
)
# Apply LoRA to model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# P-tuning with prompt tuning
from peft import PromptTuningConfig, PromptTuningInit, get_peft_model
peft_config = PromptTuningConfig(
task_type=TaskType.CAUSAL_LM,
prompt_tuning_init=PromptTuningInit.TEXT,
num_virtual_tokens=20,
prompt_tuning_init_text="Classify the sentiment of this text:",
tokenizer_name_or_path="gpt2",
)
model = get_peft_model(model, peft_config)
Evaluation & Deployment
Model Evaluation
from datasets import load_metric
# Load metrics
bleu_metric = load_metric("bleu")
rouge_metric = load_metric("rouge")
perplexity_metric = load_metric("perplexity")
# Calculate BLEU score
predictions = ["I like to eat pizza"]
references = [["I enjoy eating pizza"]]
bleu_score = bleu_metric.compute(predictions=predictions, references=references)
# Calculate ROUGE score
rouge_score = rouge_metric.compute(predictions=predictions, references=references)
# Human evaluation template
human_eval_template = {
"fluency": "How fluent and natural is the text? (1-5)",
"relevance": "How relevant is the response to the prompt? (1-5)",
"coherence": "How coherent and logically structured is the text? (1-5)",
"accuracy": "How factually accurate is the content? (1-5)",
}
# Toxicity detection
from transformers import pipeline
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
toxicity_score = toxicity_classifier("Your generated text here")
Model Deployment
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
app = FastAPI()
generator = pipeline("text-generation", model="gpt2")
class GenerationRequest(BaseModel):
prompt: str
max_length: int = 100
temperature: float = 0.7
@app.post("/generate")
async def generate_text(request: GenerationRequest):
result = generator(
request.prompt,
max_length=request.max_length,
temperature=request.temperature,
do_sample=True,
)
return {"generated_text": result[0]["generated_text"]}
# Dockerfile for deployment
FROM python:3.9-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
Comprehensive Generative AI Cheatsheet Reference
This Generative AI cheatsheet on Nikhil Learn Hub collects syntax, commands, and practical snippets for quick revision. Understand generative AI models, prompts, transformers, LLMs, and AI content generation concepts with practical examples.
Use the reference cards and examples above during coding sessions; return here instead of scattered searches when you need dependable reminders. Follow the Generative AI learning roadmap when you want structured lessons beyond one-page lookups.
Quick lookup coverage
- LLM families and API vocabulary
- Prompting, RAG, and evaluation terms
- Safety, cost, and guardrail reminders
How to study with this sheet
- Product integration spikes
- GenAI roadmap-aligned study
- Responsible AI checklist language
Who Should Use This Cheatsheet
Students, self-taught developers, and professionals who need fast Generative AI lookups during labs, debugging, or interview revision should keep this page bookmarked.
Related Resources on Nikhil Learn Hub
- Generative AI learning roadmapstructured learning path for the same technology
- Cheatsheets hubbrowse all quick-reference sheets
- Technology hubtutorials, roadmaps, and practice hubs