Skip to content

Commit 0bda0ad

Browse files
committed
make max tokens configurable
1 parent 75c11ba commit 0bda0ad

File tree

10 files changed

+196
-14
lines changed

10 files changed

+196
-14
lines changed

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ codewiki config set \
105105
--cluster-model <model-name> \
106106
--fallback-model <model-name>
107107

108+
# Configure max token settings
109+
codewiki config set --max-tokens 32768 --max-token-per-module 36369 --max-token-per-leaf-module 16000
110+
108111
# Show current configuration
109112
codewiki config show
110113

@@ -196,6 +199,33 @@ codewiki config agent --clear
196199
| `--doc-type` | Documentation style | Standalone option | `api`, `architecture`, `user-guide`, `developer` |
197200
| `--instructions` | Custom agent instructions | Standalone option | Free-form text |
198201

202+
### Token Settings
203+
204+
CodeWiki allows you to configure maximum token limits for LLM calls. This is useful for:
205+
- Adapting to different model context windows
206+
- Controlling costs by limiting response sizes
207+
- Optimizing for faster response times
208+
209+
```bash
210+
# Set max tokens for LLM responses (default: 32768)
211+
codewiki config set --max-tokens 16384
212+
213+
# Set max tokens for module clustering (default: 36369)
214+
codewiki config set --max-token-per-module 40000
215+
216+
# Set max tokens for leaf modules (default: 16000)
217+
codewiki config set --max-token-per-leaf-module 20000
218+
219+
# Override at runtime for a single generation
220+
codewiki generate --max-tokens 16384 --max-token-per-module 40000
221+
```
222+
223+
| Option | Description | Default |
224+
|--------|-------------|---------|
225+
| `--max-tokens` | Maximum output tokens for LLM response | 32768 |
226+
| `--max-token-per-module` | Input tokens threshold for module clustering | 36369 |
227+
| `--max-token-per-leaf-module` | Input tokens threshold for leaf modules | 16000 |
228+
199229
### Configuration Storage
200230

201231
- **API keys**: Securely stored in system keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service)

codewiki/cli/adapters/doc_generator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ def generate(self) -> DocumentationJob:
137137
main_model=self.config.get('main_model'),
138138
cluster_model=self.config.get('cluster_model'),
139139
fallback_model=self.config.get('fallback_model'),
140+
max_tokens=self.config.get('max_tokens', 32768),
141+
max_token_per_module=self.config.get('max_token_per_module', 36369),
142+
max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000),
140143
agent_instructions=self.config.get('agent_instructions')
141144
)
142145

codewiki/cli/commands/config.py

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,30 @@ def config_group():
6363
type=str,
6464
help="Fallback model for documentation generation"
6565
)
66+
@click.option(
67+
"--max-tokens",
68+
type=int,
69+
help="Maximum tokens for LLM response (default: 32768)"
70+
)
71+
@click.option(
72+
"--max-token-per-module",
73+
type=int,
74+
help="Maximum tokens per module for clustering (default: 36369)"
75+
)
76+
@click.option(
77+
"--max-token-per-leaf-module",
78+
type=int,
79+
help="Maximum tokens per leaf module (default: 16000)"
80+
)
6681
def config_set(
6782
api_key: Optional[str],
6883
base_url: Optional[str],
6984
main_model: Optional[str],
7085
cluster_model: Optional[str],
71-
fallback_model: Optional[str]
86+
fallback_model: Optional[str],
87+
max_tokens: Optional[int],
88+
max_token_per_module: Optional[int],
89+
max_token_per_leaf_module: Optional[int]
7290
):
7391
"""
7492
Set configuration values for CodeWiki.
@@ -88,10 +106,18 @@ def config_set(
88106
\b
89107
# Update only API key
90108
$ codewiki config set --api-key sk-new-key
109+
110+
\b
111+
# Set max tokens for LLM response
112+
$ codewiki config set --max-tokens 16384
113+
114+
\b
115+
# Set all max token settings
116+
$ codewiki config set --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000
91117
"""
92118
try:
93119
# Check if at least one option is provided
94-
if not any([api_key, base_url, main_model, cluster_model, fallback_model]):
120+
if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module]):
95121
click.echo("No options provided. Use --help for usage information.")
96122
sys.exit(EXIT_CONFIG_ERROR)
97123

@@ -113,6 +139,21 @@ def config_set(
113139
if fallback_model:
114140
validated_data['fallback_model'] = validate_model_name(fallback_model)
115141

142+
if max_tokens is not None:
143+
if max_tokens < 1:
144+
raise ConfigurationError("max_tokens must be a positive integer")
145+
validated_data['max_tokens'] = max_tokens
146+
147+
if max_token_per_module is not None:
148+
if max_token_per_module < 1:
149+
raise ConfigurationError("max_token_per_module must be a positive integer")
150+
validated_data['max_token_per_module'] = max_token_per_module
151+
152+
if max_token_per_leaf_module is not None:
153+
if max_token_per_leaf_module < 1:
154+
raise ConfigurationError("max_token_per_leaf_module must be a positive integer")
155+
validated_data['max_token_per_leaf_module'] = max_token_per_leaf_module
156+
116157
# Create config manager and save
117158
manager = ConfigManager()
118159
manager.load() # Load existing config if present
@@ -122,7 +163,10 @@ def config_set(
122163
base_url=validated_data.get('base_url'),
123164
main_model=validated_data.get('main_model'),
124165
cluster_model=validated_data.get('cluster_model'),
125-
fallback_model=validated_data.get('fallback_model')
166+
fallback_model=validated_data.get('fallback_model'),
167+
max_tokens=validated_data.get('max_tokens'),
168+
max_token_per_module=validated_data.get('max_token_per_module'),
169+
max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module')
126170
)
127171

128172
# Display success messages
@@ -159,6 +203,15 @@ def config_set(
159203
if fallback_model:
160204
click.secho(f"✓ Fallback model: {fallback_model}", fg="green")
161205

206+
if max_tokens:
207+
click.secho(f"✓ Max tokens: {max_tokens}", fg="green")
208+
209+
if max_token_per_module:
210+
click.secho(f"✓ Max token per module: {max_token_per_module}", fg="green")
211+
212+
if max_token_per_leaf_module:
213+
click.secho(f"✓ Max token per leaf module: {max_token_per_leaf_module}", fg="green")
214+
162215
click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True))
163216

164217
except ConfigurationError as e:
@@ -215,6 +268,9 @@ def config_show(output_json: bool):
215268
"cluster_model": config.cluster_model if config else "",
216269
"fallback_model": config.fallback_model if config else "glm-4p5",
217270
"default_output": config.default_output if config else "docs",
271+
"max_tokens": config.max_tokens if config else 32768,
272+
"max_token_per_module": config.max_token_per_module if config else 36369,
273+
"max_token_per_leaf_module": config.max_token_per_leaf_module if config else 16000,
218274
"agent_instructions": config.agent_instructions.to_dict() if config and config.agent_instructions else {},
219275
"config_file": str(manager.config_file_path)
220276
}
@@ -248,6 +304,13 @@ def config_show(output_json: bool):
248304
if config:
249305
click.echo(f" Default Output: {config.default_output}")
250306

307+
click.echo()
308+
click.secho("Token Settings", fg="cyan", bold=True)
309+
if config:
310+
click.echo(f" Max Tokens: {config.max_tokens}")
311+
click.echo(f" Max Token/Module: {config.max_token_per_module}")
312+
click.echo(f" Max Token/Leaf Module: {config.max_token_per_leaf_module}")
313+
251314
click.echo()
252315
click.secho("Agent Instructions", fg="cyan", bold=True)
253316
if config and config.agent_instructions and not config.agent_instructions.is_empty():

codewiki/cli/commands/generate.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,24 @@ def parse_patterns(patterns_str: str) -> List[str]:
102102
is_flag=True,
103103
help="Show detailed progress and debug information",
104104
)
105+
@click.option(
106+
"--max-tokens",
107+
type=int,
108+
default=None,
109+
help="Maximum tokens for LLM response (overrides config)",
110+
)
111+
@click.option(
112+
"--max-token-per-module",
113+
type=int,
114+
default=None,
115+
help="Maximum tokens per module for clustering (overrides config)",
116+
)
117+
@click.option(
118+
"--max-token-per-leaf-module",
119+
type=int,
120+
default=None,
121+
help="Maximum tokens per leaf module (overrides config)",
122+
)
105123
@click.pass_context
106124
def generate_command(
107125
ctx,
@@ -114,7 +132,10 @@ def generate_command(
114132
focus: Optional[str],
115133
doc_type: Optional[str],
116134
instructions: Optional[str],
117-
verbose: bool
135+
verbose: bool,
136+
max_tokens: Optional[int],
137+
max_token_per_module: Optional[int],
138+
max_token_per_leaf_module: Optional[int]
118139
):
119140
"""
120141
Generate comprehensive documentation for a code repository.
@@ -147,6 +168,14 @@ def generate_command(
147168
\b
148169
# Custom instructions
149170
$ codewiki generate --instructions "Focus on public APIs and include usage examples"
171+
172+
\b
173+
# Override max tokens for this generation
174+
$ codewiki generate --max-tokens 16384
175+
176+
\b
177+
# Set all max token limits
178+
$ codewiki generate --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000
150179
"""
151180
logger = create_logger(verbose=verbose)
152181
start_time = time.time()
@@ -276,6 +305,15 @@ def generate_command(
276305
if instructions:
277306
logger.debug(f"Custom instructions: {instructions}")
278307

308+
# Log max token settings if verbose
309+
if verbose:
310+
effective_max_tokens = max_tokens if max_tokens is not None else config.max_tokens
311+
effective_max_token_per_module = max_token_per_module if max_token_per_module is not None else config.max_token_per_module
312+
effective_max_token_per_leaf = max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module
313+
logger.debug(f"Max tokens: {effective_max_tokens}")
314+
logger.debug(f"Max token/module: {effective_max_token_per_module}")
315+
logger.debug(f"Max token/leaf module: {effective_max_token_per_leaf}")
316+
279317
# Get agent instructions (merge runtime with persistent)
280318
agent_instructions_dict = None
281319
if runtime_instructions and not runtime_instructions.is_empty():
@@ -302,6 +340,10 @@ def generate_command(
302340
'base_url': config.base_url,
303341
'api_key': api_key,
304342
'agent_instructions': agent_instructions_dict,
343+
# Max token settings (runtime overrides take precedence)
344+
'max_tokens': max_tokens if max_tokens is not None else config.max_tokens,
345+
'max_token_per_module': max_token_per_module if max_token_per_module is not None else config.max_token_per_module,
346+
'max_token_per_leaf_module': max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module,
305347
},
306348
verbose=verbose,
307349
generate_html=github_pages

codewiki/cli/config_manager.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ def save(
8888
main_model: Optional[str] = None,
8989
cluster_model: Optional[str] = None,
9090
fallback_model: Optional[str] = None,
91-
default_output: Optional[str] = None
91+
default_output: Optional[str] = None,
92+
max_tokens: Optional[int] = None,
93+
max_token_per_module: Optional[int] = None,
94+
max_token_per_leaf_module: Optional[int] = None
9295
):
9396
"""
9497
Save configuration to file and keyring.
@@ -100,6 +103,9 @@ def save(
100103
cluster_model: Clustering model
101104
fallback_model: Fallback model
102105
default_output: Default output directory
106+
max_tokens: Maximum tokens for LLM response
107+
max_token_per_module: Maximum tokens per module for clustering
108+
max_token_per_leaf_module: Maximum tokens per leaf module
103109
"""
104110
# Ensure config directory exists
105111
try:
@@ -133,6 +139,12 @@ def save(
133139
self._config.fallback_model = fallback_model
134140
if default_output is not None:
135141
self._config.default_output = default_output
142+
if max_tokens is not None:
143+
self._config.max_tokens = max_tokens
144+
if max_token_per_module is not None:
145+
self._config.max_token_per_module = max_token_per_module
146+
if max_token_per_leaf_module is not None:
147+
self._config.max_token_per_leaf_module = max_token_per_leaf_module
136148

137149
# Validate configuration (only if base fields are set)
138150
if self._config.base_url and self._config.main_model and self._config.cluster_model:

codewiki/cli/models/config.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,19 @@ class Configuration:
113113
cluster_model: Model for module clustering
114114
fallback_model: Fallback model for documentation generation
115115
default_output: Default output directory
116+
max_tokens: Maximum tokens for LLM response (default: 32768)
117+
max_token_per_module: Maximum tokens per module for clustering (default: 36369)
118+
max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000)
116119
agent_instructions: Custom agent instructions for documentation generation
117120
"""
118121
base_url: str
119122
main_model: str
120123
cluster_model: str
121124
fallback_model: str = "glm-4p5"
122125
default_output: str = "docs"
126+
max_tokens: int = 32768
127+
max_token_per_module: int = 36369
128+
max_token_per_leaf_module: int = 16000
123129
agent_instructions: AgentInstructions = field(default_factory=AgentInstructions)
124130

125131
def validate(self):
@@ -141,6 +147,9 @@ def to_dict(self) -> dict:
141147
'main_model': self.main_model,
142148
'cluster_model': self.cluster_model,
143149
'default_output': self.default_output,
150+
'max_tokens': self.max_tokens,
151+
'max_token_per_module': self.max_token_per_module,
152+
'max_token_per_leaf_module': self.max_token_per_leaf_module,
144153
}
145154
if self.agent_instructions and not self.agent_instructions.is_empty():
146155
result['agent_instructions'] = self.agent_instructions.to_dict()
@@ -167,6 +176,9 @@ def from_dict(cls, data: dict) -> 'Configuration':
167176
cluster_model=data.get('cluster_model', ''),
168177
fallback_model=data.get('fallback_model', 'glm-4p5'),
169178
default_output=data.get('default_output', 'docs'),
179+
max_tokens=data.get('max_tokens', 32768),
180+
max_token_per_module=data.get('max_token_per_module', 36369),
181+
max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000),
170182
agent_instructions=agent_instructions,
171183
)
172184

@@ -217,6 +229,9 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti
217229
main_model=self.main_model,
218230
cluster_model=self.cluster_model,
219231
fallback_model=self.fallback_model,
232+
max_tokens=self.max_tokens,
233+
max_token_per_module=self.max_token_per_module,
234+
max_token_per_leaf_module=self.max_token_per_leaf_module,
220235
agent_instructions=final_instructions.to_dict() if final_instructions else None
221236
)
222237

codewiki/src/be/agent_tools/generate_sub_module_documentations.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from codewiki.src.be.prompt_template import SYSTEM_PROMPT, LEAF_SYSTEM_PROMPT, format_user_prompt
88
from codewiki.src.be.utils import is_complex_module, count_tokens
99
from codewiki.src.be.cluster_modules import format_potential_core_components
10-
from codewiki.src.config import MAX_TOKEN_PER_LEAF_MODULE
1110

1211
import logging
1312
logger = logging.getLogger(__name__)
@@ -47,7 +46,7 @@ async def generate_sub_module_documentation(
4746

4847
num_tokens = count_tokens(format_potential_core_components(core_component_ids, ctx.deps.components)[-1])
4948

50-
if is_complex_module(ctx.deps.components, core_component_ids) and ctx.deps.current_depth < ctx.deps.max_depth and num_tokens >= MAX_TOKEN_PER_LEAF_MODULE:
49+
if is_complex_module(ctx.deps.components, core_component_ids) and ctx.deps.current_depth < ctx.deps.max_depth and num_tokens >= ctx.deps.config.max_token_per_leaf_module:
5150
sub_agent = Agent(
5251
model=fallback_models,
5352
name=sub_module_name,

codewiki/src/be/cluster_modules.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from codewiki.src.be.dependency_analyzer.models.core import Node
88
from codewiki.src.be.llm_services import call_llm
99
from codewiki.src.be.utils import count_tokens
10-
from codewiki.src.config import MAX_TOKEN_PER_MODULE, Config
10+
from codewiki.src.config import Config
1111
from codewiki.src.be.prompt_template import format_cluster_prompt
1212

1313

@@ -54,7 +54,7 @@ def cluster_modules(
5454
"""
5555
potential_core_components, potential_core_components_with_code = format_potential_core_components(leaf_nodes, components)
5656

57-
if count_tokens(potential_core_components_with_code) <= MAX_TOKEN_PER_MODULE:
57+
if count_tokens(potential_core_components_with_code) <= config.max_token_per_module:
5858
logger.debug(f"Skipping clustering for {current_module_name} because the potential core components are too few: {count_tokens(potential_core_components_with_code)} tokens")
5959
return {}
6060

0 commit comments

Comments
 (0)