Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/adapters/llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ pub struct Usage {
#[async_trait]
pub trait LLMAdapter: Send + Sync {
async fn complete(&self, request: LLMRequest) -> Result<LLMResponse>;
#[allow(dead_code)]
fn model_name(&self) -> &str;
}

Expand Down Expand Up @@ -99,9 +98,8 @@ pub fn create_adapter(config: &ModelConfig) -> Result<Box<dyn LLMAdapter>> {
}

// Vendor-prefixed model IDs (vendor/model)
if config.model_name.contains('/') {
let (vendor, model) = config.model_name.split_once('/').unwrap();
let model = model.to_string();
if let Some((vendor, model_suffix)) = config.model_name.split_once('/') {
let model = model_suffix.to_string();
match vendor {
"anthropic" => {
// Route anthropic/ prefix directly to the Anthropic adapter
Expand Down
73 changes: 8 additions & 65 deletions src/adapters/ollama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,57 +43,7 @@ struct OllamaChatResponse {
eval_count: Option<usize>,
}

// -- Legacy generate API types (kept for reference / future use) --

#[allow(dead_code)]
#[derive(Serialize)]
struct OllamaGenerateRequest {
model: String,
prompt: String,
system: String,
temperature: f32,
num_predict: usize,
stream: bool,
}

#[allow(dead_code)]
#[derive(Deserialize)]
struct OllamaGenerateResponse {
response: String,
model: String,
done: bool,
_context: Option<Vec<i32>>,
_total_duration: Option<u64>,
prompt_eval_count: Option<usize>,
eval_count: Option<usize>,
}

// -- /api/show types for context window detection --

#[allow(dead_code)]
#[derive(Serialize)]
struct OllamaShowRequest {
name: String,
}

#[allow(dead_code)]
#[derive(Deserialize)]
struct OllamaShowResponse {
#[serde(default)]
parameters: Option<String>,
#[allow(dead_code)]
#[serde(default)]
details: Option<OllamaShowDetails>,
}

#[allow(dead_code)]
#[derive(Deserialize)]
struct OllamaShowDetails {
family: Option<String>,
parameter_size: Option<String>,
quantization_level: Option<String>,
}

impl OllamaAdapter {
pub fn new(config: ModelConfig) -> Result<Self> {
let base_url = config
Expand Down Expand Up @@ -131,27 +81,20 @@ impl OllamaAdapter {
///
/// Returns `Some(num_ctx)` if the model metadata contains a `num_ctx` parameter,
/// or `None` if the endpoint is unreachable or the parameter is not found.
#[allow(dead_code)]
#[allow(dead_code)] // Public API, used in tests; production uses OfflineModelManager
pub async fn detect_context_window(&self) -> Option<usize> {
let url = format!("{}/api/show", self.base_url);
let show_request = OllamaShowRequest {
name: self.model_name_bare().to_string(),
};
let body = serde_json::json!({"name": self.model_name_bare()});

let response = self
.client
.post(&url)
.json(&show_request)
.send()
.await
.ok()?;
let response = self.client.post(&url).json(&body).send().await.ok()?;

if !response.status().is_success() {
return None;
}

let show_response: OllamaShowResponse = response.json().await.ok()?;
parse_num_ctx(show_response.parameters.as_deref())
let value: serde_json::Value = response.json().await.ok()?;
let parameters = value.get("parameters").and_then(|p| p.as_str());
parse_num_ctx(parameters)
}
}

Expand All @@ -162,8 +105,8 @@ impl OllamaAdapter {
/// num_ctx 4096
/// temperature 0.8
/// ```
#[allow(dead_code)]
fn parse_num_ctx(parameters: Option<&str>) -> Option<usize> {
#[allow(dead_code)] // Called by detect_context_window
pub(crate) fn parse_num_ctx(parameters: Option<&str>) -> Option<usize> {
let params = parameters?;
for line in params.lines() {
let line = line.trim();
Expand Down
48 changes: 1 addition & 47 deletions src/commands/doctor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,7 @@ pub async fn doctor_command(config: Config) -> Result<()> {

// Context window detection (Ollama only)
if endpoint_type == "ollama" {
if let Some(ctx_size) =
detect_model_context_window(&client, &base_url, &recommended.name).await
{
if let Ok(Some(ctx_size)) = manager.detect_context_window(&recommended.name).await {
println!(
" Context window: {} tokens (detected from model)",
ctx_size
Expand Down Expand Up @@ -289,50 +287,6 @@ fn estimate_tokens(text: &str) -> usize {
(text.len() / 4).max(1)
}

/// Query Ollama's `/api/show` to detect the model's context window size.
async fn detect_model_context_window(
client: &Client,
base_url: &str,
model_name: &str,
) -> Option<usize> {
let url = format!("{}/api/show", base_url);
let body = serde_json::json!({"name": model_name});
let resp = client.post(&url).json(&body).send().await.ok()?;
if !resp.status().is_success() {
return None;
}
let text = resp.text().await.ok()?;
let value: serde_json::Value = serde_json::from_str(&text).ok()?;

// The "parameters" field is a newline-delimited string of key-value pairs
if let Some(params) = value.get("parameters").and_then(|p| p.as_str()) {
for line in params.lines() {
let trimmed = line.trim();
if trimmed.starts_with("num_ctx") {
if let Some(val) = trimmed.split_whitespace().nth(1) {
return val.parse().ok();
}
}
}
}

// Also check model_info for context_length
if let Some(info) = value.get("model_info") {
// Try common key patterns
for key in &[
"context_length",
"llama.context_length",
"general.context_length",
] {
if let Some(ctx) = info.get(*key).and_then(|v| v.as_u64()) {
return Some(ctx as usize);
}
}
}

None
}

/// Print system resource information (RAM and GPU if available).
fn check_system_resources() {
println!("System Resources:");
Expand Down
6 changes: 4 additions & 2 deletions src/commands/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ async fn suggest_commit_message(config: config::Config) -> Result<()> {
return Ok(());
}

let model_config = config.to_model_config();
// Use Fast model for commit message suggestion (lightweight task)
let model_config = config.to_model_config_for_role(config::ModelRole::Fast);

let adapter = adapters::llm::create_adapter(&model_config)?;

Expand Down Expand Up @@ -114,7 +115,8 @@ async fn suggest_pr_title(config: config::Config) -> Result<()> {
return Ok(());
}

let model_config = config.to_model_config();
// Use Fast model for PR title suggestion (lightweight task)
let model_config = config.to_model_config_for_role(config::ModelRole::Fast);

let adapter = adapters::llm::create_adapter(&model_config)?;

Expand Down
6 changes: 3 additions & 3 deletions src/commands/pr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ pub async fn pr_command(
let diffs = core::DiffParser::parse_unified_diff(&diff_content)?;
let git = core::GitIntegration::new(".")?;

let model_config = config.to_model_config();

let adapter = adapters::llm::create_adapter(&model_config)?;
// Use Fast model for PR summary generation (lightweight task)
let fast_config = config.to_model_config_for_role(config::ModelRole::Fast);
let adapter = adapters::llm::create_adapter(&fast_config)?;
let options = core::SummaryOptions {
include_diagram: config.smart_review_diagram,
};
Expand Down
21 changes: 19 additions & 2 deletions src/commands/smart_review.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ pub async fn smart_review_command(
let model_config = config.to_model_config();

let adapter = adapters::llm::create_adapter(&model_config)?;

// Use Fast model for PR summary and diagram generation (lightweight tasks).
// Only create a separate adapter if model_fast differs from the primary model.
let fast_config = config.to_model_config_for_role(config::ModelRole::Fast);
let separate_fast_adapter: Option<Box<dyn adapters::llm::LLMAdapter>> =
if fast_config.model_name != model_config.model_name {
info!(
"Using fast model '{}' for PR summary/diagram",
fast_config.model_name
);
Some(adapters::llm::create_adapter(&fast_config)?)
} else {
None
};
let summary_adapter: &dyn adapters::llm::LLMAdapter =
separate_fast_adapter.as_deref().unwrap_or(adapter.as_ref());

let mut all_comments = Vec::new();
let mut pr_summary = if config.smart_review_summary {
match core::GitIntegration::new(&repo_root) {
Expand All @@ -72,7 +89,7 @@ pub async fn smart_review_command(
match core::PRSummaryGenerator::generate_summary_with_options(
&diffs,
&git,
adapter.as_ref(),
summary_adapter,
options,
)
.await
Expand All @@ -94,7 +111,7 @@ pub async fn smart_review_command(
};

if config.smart_review_diagram {
match core::PRSummaryGenerator::generate_change_diagram(&diffs, adapter.as_ref()).await {
match core::PRSummaryGenerator::generate_change_diagram(&diffs, summary_adapter).await {
Ok(Some(diagram)) => {
if let Some(summary) = &mut pr_summary {
summary.visual_diff = Some(diagram);
Expand Down
58 changes: 58 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ pub enum ModelRole {
Reasoning,
/// Embedding model for RAG indexing.
Embedding,
/// Fast model for lightweight LLM tasks: PR summaries, commit messages,
/// PR titles, diagram generation. Falls back to Weak, then Primary.
Fast,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
Expand Down Expand Up @@ -51,6 +54,11 @@ pub struct Config {
#[serde(default)]
pub model_weak: Option<String>,

/// Fast model for lightweight LLM tasks: PR summaries, commit messages,
/// PR titles, diagram generation. Falls back to model_weak, then primary.
#[serde(default)]
pub model_fast: Option<String>,

/// Reasoning-capable model for complex analysis and self-reflection.
#[serde(default)]
pub model_reasoning: Option<String>,
Expand Down Expand Up @@ -338,6 +346,7 @@ impl Default for Config {
Self {
model: default_model(),
model_weak: None,
model_fast: None,
model_reasoning: None,
model_embedding: None,
fallback_models: Vec::new(),
Expand Down Expand Up @@ -914,6 +923,11 @@ impl Config {
ModelRole::Weak => self.model_weak.as_deref().unwrap_or(&self.model),
ModelRole::Reasoning => self.model_reasoning.as_deref().unwrap_or(&self.model),
ModelRole::Embedding => self.model_embedding.as_deref().unwrap_or(&self.model),
ModelRole::Fast => self
.model_fast
.as_deref()
.or(self.model_weak.as_deref())
.unwrap_or(&self.model),
Comment on lines +926 to +930

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Fast model role defaults to smaller models, violating CLAUDE.md convention

CLAUDE.md states: "Use frontier models for reviews — never default to smaller models." The new ModelRole::Fast fallback chain at src/config.rs:926-930 is model_fast -> model_weak -> primary. When model_fast is not set but model_weak is configured (e.g., claude-haiku), the Fast role silently defaults to the smaller/cheaper Weak model for PR summaries, commit messages, PR titles, and diagram generation. These are all part of the review pipeline. While the core code analysis still uses the frontier model, this fallback chain means the review pipeline as a whole can default to smaller models without the user explicitly opting in via model_fast.

Suggested change
ModelRole::Fast => self
.model_fast
.as_deref()
.or(self.model_weak.as_deref())
.unwrap_or(&self.model),
ModelRole::Fast => self
.model_fast
.as_deref()
.unwrap_or(&self.model),
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}
}

Expand Down Expand Up @@ -1650,6 +1664,50 @@ mod tests {
);
}

#[test]
fn test_model_role_fast_fallback_to_primary() {
let config = Config {
model: "claude-sonnet-4-6".to_string(),
model_fast: None,
model_weak: None,
..Config::default()
};
assert_eq!(config.model_for_role(ModelRole::Fast), "claude-sonnet-4-6");
}

#[test]
fn test_model_role_fast_fallback_to_weak() {
let config = Config {
model: "claude-sonnet-4-6".to_string(),
model_fast: None,
model_weak: Some("claude-haiku-4-5".to_string()),
..Config::default()
};
assert_eq!(config.model_for_role(ModelRole::Fast), "claude-haiku-4-5");
}

#[test]
fn test_model_role_fast_explicit() {
let config = Config {
model: "claude-sonnet-4-6".to_string(),
model_fast: Some("gpt-4o-mini".to_string()),
model_weak: Some("claude-haiku-4-5".to_string()),
..Config::default()
};
assert_eq!(config.model_for_role(ModelRole::Fast), "gpt-4o-mini");
}

#[test]
fn test_to_model_config_for_role_fast() {
let config = Config {
model: "claude-sonnet-4-6".to_string(),
model_fast: Some("gpt-4o-mini".to_string()),
..Config::default()
};
let fast_config = config.to_model_config_for_role(ModelRole::Fast);
assert_eq!(fast_config.model_name, "gpt-4o-mini");
}

#[test]
fn test_to_model_config_for_role_uses_correct_model() {
let config = Config {
Expand Down
10 changes: 7 additions & 3 deletions src/core/interactive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ use anyhow::Result;
use regex::Regex;
use std::collections::HashSet;

#[allow(dead_code)]
pub struct InteractiveCommand {
pub command: CommandType,
pub args: Vec<String>,
#[allow(dead_code)] // Set by webhook handler when PR context is available
pub context: Option<String>,
}

#[allow(dead_code)]
#[derive(Debug, Clone, PartialEq)]
pub enum CommandType {
Review,
Expand All @@ -21,7 +20,6 @@ pub enum CommandType {
Config,
}

#[allow(dead_code)]
impl InteractiveCommand {
pub fn parse(comment: &str) -> Option<Self> {
let command_regex = Regex::new(r"@diffscope\s+(\w+)(?:\s+(.*))?").ok()?;
Expand Down Expand Up @@ -186,6 +184,10 @@ impl InteractiveCommand {
))
}

pub fn help_text() -> String {
Self::get_help_text()
}

fn get_help_text() -> String {
r#"## 🤖 DiffScope Interactive Commands

Expand Down Expand Up @@ -244,6 +246,8 @@ Interactive commands respect these configurations."#
}
}

/// Manages per-session ignore patterns from @diffscope ignore commands.
/// Will be wired into the review pipeline's triage filter.
#[allow(dead_code)]
pub struct InteractiveProcessor {
ignored_patterns: HashSet<String>,
Expand Down
Loading
Loading