add tokenizer

This commit is contained in:
musistudio
2025-12-31 12:40:27 +08:00
parent 38cc5feadb
commit b69ebf7ad1
13 changed files with 1116 additions and 13 deletions

View File

@@ -36,6 +36,7 @@
"@anthropic-ai/sdk": "^0.54.0",
"@fastify/cors": "^11.0.1",
"@google/genai": "^1.7.0",
"@huggingface/tokenizers": "^0.0.6",
"dotenv": "^16.5.0",
"fastify": "^5.4.0",
"fastify-plugin": "^5.1.0",

View File

@@ -30,6 +30,7 @@ import { errorHandler } from "./api/middleware";
import { registerApiRoutes } from "./api/routes";
import { ProviderService } from "./services/provider";
import { TransformerService } from "./services/transformer";
import { TokenizerService } from "./services/tokenizer";
import { router, calculateTokenCount, searchProjectBySession } from "./utils/router";
import { sessionUsageCache } from "./utils/cache";
@@ -68,6 +69,7 @@ class Server {
configService: ConfigService;
providerService!: ProviderService;
transformerService: TransformerService;
tokenizerService: TokenizerService;
constructor(options: ServerOptions = {}) {
const { initialConfig, ...fastifyOptions } = options;
@@ -80,6 +82,10 @@ class Server {
this.configService,
this.app.log
);
this.tokenizerService = new TokenizerService(
this.configService,
this.app.log
);
this.transformerService.initialize().finally(() => {
this.providerService = new ProviderService(
this.configService,
@@ -87,6 +93,10 @@ class Server {
this.app.log
);
});
// Initialize tokenizer service
this.tokenizerService.initialize().catch((error) => {
this.app.log.error(`Failed to initialize TokenizerService: ${error}`);
});
}
async register<Options extends FastifyPluginOptions = FastifyPluginOptions>(
@@ -127,12 +137,14 @@ class Server {
fastify.decorate('configService', this.configService);
fastify.decorate('transformerService', this.transformerService);
fastify.decorate('providerService', this.providerService);
fastify.decorate('tokenizerService', this.tokenizerService);
// Add router hook for main namespace
fastify.addHook('preHandler', async (req: any, reply: any) => {
const url = new URL(`http://127.0.0.1${req.url}`);
if (url.pathname.endsWith("/v1/messages")) {
await router(req, reply, {
configService: this.configService,
tokenizerService: this.tokenizerService,
});
}
});
@@ -157,16 +169,23 @@ class Server {
transformerService,
this.app.log
);
const tokenizerService = new TokenizerService(
configService,
this.app.log
);
await tokenizerService.initialize();
await this.app.register(async (fastify) => {
fastify.decorate('configService', configService);
fastify.decorate('transformerService', transformerService);
fastify.decorate('providerService', providerService);
fastify.decorate('tokenizerService', tokenizerService);
// Add router hook for namespace
fastify.addHook('preHandler', async (req: any, reply: any) => {
const url = new URL(`http://127.0.0.1${req.url}`);
if (url.pathname.endsWith("/v1/messages")) {
await router(req, reply, {
configService,
tokenizerService,
});
}
});
@@ -248,5 +267,6 @@ export { searchProjectBySession };
export { ConfigService } from "./services/config";
export { ProviderService } from "./services/provider";
export { TransformerService } from "./services/transformer";
export { TokenizerService } from "./services/tokenizer";
export { pluginManager, tokenSpeedPlugin, CCRPlugin, CCRPluginOptions, PluginMetadata } from "./plugins";
export { SSEParserTransform, SSESerializerTransform, rewriteStream } from "./utils/sse";

View File

@@ -0,0 +1,203 @@
import { ConfigService } from "./config";
import {
ITokenizer,
TokenizeRequest,
TokenizerConfig,
TokenizerResult,
TokenizerOptions,
} from "../types/tokenizer";
import { TiktokenTokenizer } from "../tokenizer/tiktoken-tokenizer";
import { HuggingFaceTokenizer } from "../tokenizer/huggingface-tokenizer";
import { ApiTokenizer } from "../tokenizer/api-tokenizer";
/**
* TokenizerService - Manages tokenization for different model types
*
* Supports three types of tokenizers:
* - tiktoken: Fast, OpenAI-compatible tokenizer (default)
* - huggingface: Local model-based tokenizer for open-source models
* - api: API-based tokenizer for closed-source models
*
* Features:
* - Automatic fallback to tiktoken on errors
* - Config-driven tokenizer selection
* - Per-provider and per-model configuration
*/
export class TokenizerService {
private tokenizers: Map<string, ITokenizer> = new Map();
private configService: ConfigService;
private logger: any;
private options: TokenizerOptions;
// Fallback tokenizer (default to tiktoken)
private fallbackTokenizer?: ITokenizer;
constructor(
configService: ConfigService,
logger: any,
options: TokenizerOptions = {}
) {
this.configService = configService;
this.logger = logger;
this.options = {
timeout: options.timeout ?? 30000,
...options,
};
}
async initialize(): Promise<void> {
try {
// Initialize fallback tokenizer (tiktoken with cl100k_base)
this.fallbackTokenizer = new TiktokenTokenizer("cl100k_base");
await this.fallbackTokenizer.initialize();
this.tokenizers.set("fallback", this.fallbackTokenizer);
this.logger?.info("TokenizerService initialized successfully");
} catch (error: any) {
this.logger?.error(
`TokenizerService initialization error: ${error.message}`
);
throw error;
}
}
/**
* Get or create a tokenizer for specific configuration
*/
async getTokenizer(config: TokenizerConfig): Promise<ITokenizer> {
const cacheKey = this.getCacheKey(config);
// Check cache first
if (this.tokenizers.has(cacheKey)) {
return this.tokenizers.get(cacheKey)!;
}
let tokenizer: ITokenizer;
try {
switch (config.type) {
case "tiktoken":
tokenizer = new TiktokenTokenizer(
config.encoding || "cl100k_base"
);
break;
case "huggingface":
this.logger?.info(`Initializing HuggingFace tokenizer for model: ${config.model}`);
tokenizer = new HuggingFaceTokenizer(
config.model!,
this.logger,
{ timeout: this.options.timeout }
);
break;
case "api":
tokenizer = new ApiTokenizer(
config,
this.logger,
{ timeout: this.options.timeout }
);
break;
default:
throw new Error(`Unknown tokenizer type: ${config.type}`);
}
this.logger?.info(`Calling initialize() on ${config.type} tokenizer...`);
await tokenizer.initialize();
this.tokenizers.set(cacheKey, tokenizer);
this.logger?.info(
`Tokenizer initialized successfully: ${config.type} (${cacheKey})`
);
return tokenizer;
} catch (error: any) {
this.logger?.error(
`Failed to initialize ${config.type} tokenizer: ${error.message}`
);
this.logger?.error(`Error stack: ${error.stack}`);
// Return fallback tokenizer
if (!this.fallbackTokenizer) {
await this.initialize();
}
return this.fallbackTokenizer!;
}
}
/**
* Count tokens for a request using the specified tokenizer configuration
*/
async countTokens(
request: TokenizeRequest,
config?: TokenizerConfig
): Promise<TokenizerResult> {
// Get appropriate tokenizer
const tokenizer = config
? await this.getTokenizer(config)
: this.fallbackTokenizer!;
// Count tokens
const tokenCount = await tokenizer.countTokens(request);
return {
tokenCount,
tokenizerUsed: tokenizer.name,
cached: false,
};
}
/**
* Get tokenizer configuration for a specific model/provider
*/
getTokenizerConfigForModel(
providerName: string,
modelName: string
): TokenizerConfig | undefined {
const providers = this.configService.get<any[]>("providers") || [];
const provider = providers.find((p) => p.name === providerName);
if (!provider?.tokenizer) {
return undefined;
}
// Check model-specific config first
if (provider.tokenizer.models?.[modelName]) {
return provider.tokenizer.models[modelName];
}
// Fall back to default config
return provider.tokenizer.default;
}
/**
* Dispose all tokenizers
*/
dispose(): void {
this.tokenizers.forEach((tokenizer) => {
try {
tokenizer.dispose();
} catch (error) {
this.logger?.error(`Error disposing tokenizer: ${error}`);
}
});
this.tokenizers.clear();
}
/**
* Generate cache key from tokenizer config
*/
private getCacheKey(config: TokenizerConfig): string {
switch (config.type) {
case "tiktoken":
return `tiktoken:${config.encoding || "cl100k_base"}`;
case "huggingface":
return `hf:${config.model}`;
case "api":
return `api:${config.url}`;
default:
return `unknown:${JSON.stringify(config)}`;
}
}
}

View File

@@ -0,0 +1,271 @@
import {
ITokenizer,
TokenizeRequest,
TokenizerConfig,
ApiRequestFormat,
} from "../types/tokenizer";
/**
* Options for API tokenizer
*/
interface ApiTokenizerOptions {
timeout?: number;
}
/**
* API-based tokenizer implementation
* Calls provider's tokenization API to get token counts
* Supports flexible configuration for different API formats
*/
export class ApiTokenizer implements ITokenizer {
readonly type = "api";
readonly name: string;
private config: Required<Pick<TokenizerConfig, 'url' | 'apiKey' | 'requestFormat' | 'responseField'>> & {
headers: Record<string, string>;
};
private logger: any;
private options: ApiTokenizerOptions;
constructor(
config: TokenizerConfig,
logger: any,
options: ApiTokenizerOptions = {}
) {
if (!config.url || !config.apiKey) {
throw new Error("API tokenizer requires url and apiKey");
}
this.config = {
url: config.url,
apiKey: config.apiKey,
requestFormat: config.requestFormat || "standard",
responseField: config.responseField || "token_count",
headers: config.headers || {},
};
this.logger = logger;
this.options = options;
try {
const url = new URL(config.url);
this.name = `api-${url.hostname}`;
} catch {
this.name = `api-${config.url}`;
}
}
async initialize(): Promise<void> {
// API tokenizers don't need initialization
// Just verify the URL is valid
try {
new URL(this.config.url);
} catch (error) {
throw new Error(`Invalid API URL: ${this.config.url}`);
}
}
async countTokens(request: TokenizeRequest): Promise<number> {
try {
// Prepare request body based on format
const requestBody = this.formatRequestBody(request);
// Prepare headers
const headers = {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.config.apiKey}`,
...this.config.headers,
};
// Make API call
const controller = new AbortController();
const timeoutId = setTimeout(
() => controller.abort(),
this.options.timeout || 30000
);
const response = await fetch(this.config.url, {
method: "POST",
headers,
body: JSON.stringify(requestBody),
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
throw new Error(
`API tokenizer request failed: ${response.status} ${response.statusText}`
);
}
const data = await response.json();
// Extract token count from response
const tokenCount = this.extractTokenCount(data);
return tokenCount;
} catch (error: any) {
if (error.name === "AbortError") {
throw new Error("API tokenizer request timed out");
}
throw error;
}
}
isInitialized(): boolean {
return true;
}
dispose(): void {
// Nothing to dispose for API tokenizer
}
/**
* Format request body based on requestFormat
*/
private formatRequestBody(request: TokenizeRequest): any {
switch (this.config.requestFormat) {
case "standard":
// Standard format: send the entire request
return request;
case "openai":
// OpenAI format: extract text content and format as OpenAI request
return {
model: "gpt-3.5-turbo", // Placeholder, some APIs require this
messages: this.extractMessagesAsOpenAIFormat(request),
};
case "anthropic":
// Anthropic format: extract messages and tools
return {
messages: request.messages || [],
system: request.system,
tools: request.tools,
};
case "custom":
// Custom format: send concatenated text
return {
text: this.extractConcatenatedText(request),
};
default:
return request;
}
}
/**
* Extract messages in OpenAI format
*/
private extractMessagesAsOpenAIFormat(request: TokenizeRequest): any[] {
if (!request.messages) return [];
return request.messages.map((msg) => ({
role: msg.role,
content: this.extractTextFromMessage(msg),
}));
}
/**
* Extract text from a message
*/
private extractTextFromMessage(message: any): string {
if (typeof message.content === "string") {
return message.content;
}
if (Array.isArray(message.content)) {
return message.content
.map((part: any) => {
if (part.type === "text" && part.text) {
return part.text;
} else if (part.type === "tool_use" && part.input) {
return JSON.stringify(part.input);
} else if (part.type === "tool_result") {
return typeof part.content === "string"
? part.content
: JSON.stringify(part.content);
}
return "";
})
.join(" ");
}
return "";
}
/**
* Extract all text from request
*/
private extractConcatenatedText(request: TokenizeRequest): string {
const parts: string[] = [];
// Extract messages
if (request.messages) {
request.messages.forEach((msg) => {
parts.push(this.extractTextFromMessage(msg));
});
}
// Extract system
if (typeof request.system === "string") {
parts.push(request.system);
} else if (Array.isArray(request.system)) {
request.system.forEach((item: any) => {
if (item.type === "text") {
if (typeof item.text === "string") {
parts.push(item.text);
} else if (Array.isArray(item.text)) {
item.text.forEach((textPart: any) => {
if (textPart) parts.push(textPart);
});
}
}
});
}
// Extract tools
if (request.tools) {
request.tools.forEach((tool) => {
if (tool.name) parts.push(tool.name);
if (tool.description) parts.push(tool.description);
if (tool.input_schema) parts.push(JSON.stringify(tool.input_schema));
});
}
return parts.join(" ");
}
/**
* Extract token count from response using the configured field path
*/
private extractTokenCount(data: any): number {
try {
const fieldPath = this.config.responseField;
const parts = fieldPath.split(".");
let value: any = data;
for (const part of parts) {
if (value === undefined || value === null) {
throw new Error(`Field path '${fieldPath}' not found in response`);
}
value = value[part];
}
if (typeof value !== "number") {
throw new Error(
`Expected number at field path '${fieldPath}', got ${typeof value}`
);
}
return value;
} catch (error: any) {
this.logger?.error(
`Failed to extract token count from API response: ${error.message}. Response: ${JSON.stringify(data)}`
);
throw new Error(
`Invalid response from API tokenizer: ${error.message}`
);
}
}
}

View File

@@ -0,0 +1,229 @@
import { join } from "path";
import { homedir } from "os";
import { existsSync, mkdirSync } from "fs";
import { promises as fs } from "fs";
import {
ITokenizer,
TokenizeRequest,
} from "../types/tokenizer";
import { Tokenizer } from "@huggingface/tokenizers";
/**
* Options for HuggingFace tokenizer
*/
interface HFTokenizerOptions {
timeout?: number;
cacheDir?: string;
}
/**
* HuggingFace tokenizer implementation
* Uses @huggingface/tokenizers library for lightweight tokenization
*/
export class HuggingFaceTokenizer implements ITokenizer {
readonly type = "huggingface";
readonly name: string;
private readonly modelId: string;
private readonly logger: any;
private readonly options: HFTokenizerOptions;
private tokenizer: any = null;
private readonly cacheDir: string;
private readonly safeModelName: string;
constructor(modelId: string, logger: any, options: HFTokenizerOptions = {}) {
this.modelId = modelId;
this.logger = logger;
this.options = options;
this.cacheDir = options.cacheDir || join(homedir(), ".claude-code-router", ".huggingface");
// Cache safe model name to avoid repeated regex operations
this.safeModelName = modelId.replace(/\//g, "_").replace(/[^a-zA-Z0-9_-]/g, "_");
this.name = `huggingface-${modelId.split("/").pop()}`;
}
/**
* Get cache file paths for tokenizer files
*/
private getCachePaths() {
const modelDir = join(this.cacheDir, this.safeModelName);
return {
modelDir,
tokenizerJson: join(modelDir, "tokenizer.json"),
tokenizerConfig: join(modelDir, "tokenizer_config.json"),
};
}
/**
* Ensure directory exists
*/
private ensureDir(dir: string): void {
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
}
/**
* Load tokenizer files from local cache
*/
private async loadFromCache(): Promise<{ tokenizerJson: any; tokenizerConfig: any } | null> {
try {
const paths = this.getCachePaths();
if (!existsSync(paths.tokenizerJson) || !existsSync(paths.tokenizerConfig)) {
return null;
}
const [tokenizerJsonContent, tokenizerConfigContent] = await Promise.all([
fs.readFile(paths.tokenizerJson, "utf-8"),
fs.readFile(paths.tokenizerConfig, "utf-8"),
]);
return {
tokenizerJson: JSON.parse(tokenizerJsonContent),
tokenizerConfig: JSON.parse(tokenizerConfigContent),
};
} catch (error: any) {
this.logger?.warn(`Failed to load from cache: ${error.message}`);
return null;
}
}
/**
* Download tokenizer files from Hugging Face Hub and save to cache
*/
private async downloadAndCache(): Promise<{ tokenizerJson: any; tokenizerConfig: any }> {
const paths = this.getCachePaths();
const urls = {
json: `https://huggingface.co/${this.modelId}/resolve/main/tokenizer.json`,
config: `https://huggingface.co/${this.modelId}/resolve/main/tokenizer_config.json`,
};
this.logger?.info(`Downloading tokenizer files for ${this.modelId}`);
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.options.timeout || 30000);
try {
const [jsonRes, configRes] = await Promise.all([
fetch(urls.json, { signal: controller.signal }),
fetch(urls.config, { signal: controller.signal }),
]);
if (!jsonRes.ok) {
throw new Error(`Failed to fetch tokenizer.json: ${jsonRes.statusText}`);
}
const [tokenizerJson, tokenizerConfig] = await Promise.all([
jsonRes.json(),
configRes.ok ? configRes.json() : Promise.resolve({}),
]);
this.ensureDir(paths.modelDir);
await Promise.all([
fs.writeFile(paths.tokenizerJson, JSON.stringify(tokenizerJson, null, 2)),
fs.writeFile(paths.tokenizerConfig, JSON.stringify(tokenizerConfig, null, 2)),
]);
return { tokenizerJson, tokenizerConfig };
} finally {
clearTimeout(timeoutId);
}
}
async initialize(): Promise<void> {
try {
this.logger?.info(`Initializing HuggingFace tokenizer: ${this.modelId}`);
const paths = this.getCachePaths();
this.ensureDir(this.cacheDir);
const tokenizerData = await this.loadFromCache() || await this.downloadAndCache();
this.tokenizer = new Tokenizer(tokenizerData.tokenizerJson, tokenizerData.tokenizerConfig);
this.logger?.info(`Tokenizer initialized: ${this.name}`);
} catch (error: any) {
this.logger?.error(`Failed to initialize tokenizer: ${error.message}`);
throw new Error(`Failed to initialize HuggingFace tokenizer for ${this.modelId}: ${error.message}`);
}
}
async countTokens(request: TokenizeRequest): Promise<number> {
if (!this.tokenizer) {
throw new Error("Tokenizer not initialized");
}
try {
const text = this.extractTextFromRequest(request);
return this.tokenizer.encode(text).ids.length;
} catch (error: any) {
this.logger?.error(`Error counting tokens: ${error.message}`);
throw error;
}
}
isInitialized(): boolean {
return this.tokenizer !== null;
}
dispose(): void {
this.tokenizer = null;
}
/**
* Extract text from tokenize request
*/
private extractTextFromRequest(request: TokenizeRequest): string {
const parts: string[] = [];
const { messages, system, tools } = request;
// Extract messages
if (Array.isArray(messages)) {
for (const message of messages) {
if (typeof message.content === "string") {
parts.push(message.content);
} else if (Array.isArray(message.content)) {
for (const contentPart of message.content) {
if (contentPart.type === "text" && contentPart.text) {
parts.push(contentPart.text);
} else if (contentPart.type === "tool_use" && contentPart.input) {
parts.push(JSON.stringify(contentPart.input));
} else if (contentPart.type === "tool_result") {
parts.push(
typeof contentPart.content === "string"
? contentPart.content
: JSON.stringify(contentPart.content)
);
}
}
}
}
}
// Extract system
if (typeof system === "string") {
parts.push(system);
} else if (Array.isArray(system)) {
for (const item of system) {
if (item.type === "text") {
if (typeof item.text === "string") {
parts.push(item.text);
} else if (Array.isArray(item.text)) {
for (const textPart of item.text) {
if (textPart) parts.push(textPart);
}
}
}
}
}
// Extract tools
if (tools) {
for (const tool of tools) {
if (tool.name) parts.push(tool.name);
if (tool.description) parts.push(tool.description);
if (tool.input_schema) parts.push(JSON.stringify(tool.input_schema));
}
}
return parts.join(" ");
}
}

View File

@@ -0,0 +1,112 @@
import { get_encoding, Tiktoken } from "tiktoken";
import {
ITokenizer,
TokenizeRequest,
} from "../types/tokenizer";
/**
* Tiktoken-based tokenizer implementation
* Uses tiktoken library for fast token counting (OpenAI compatible)
*/
export class TiktokenTokenizer implements ITokenizer {
readonly type = "tiktoken";
readonly name: string;
private encoding?: Tiktoken;
private encodingName: string;
constructor(encodingName: string = "cl100k_base") {
this.encodingName = encodingName;
this.name = `tiktoken-${encodingName}`;
try {
this.encoding = get_encoding(encodingName);
} catch (error) {
throw new Error(`Failed to initialize tiktoken encoding: ${encodingName}`);
}
}
async initialize(): Promise<void> {
// Encoding is already initialized in constructor
if (!this.encoding) {
throw new Error("Tiktoken encoding not initialized");
}
}
async countTokens(request: TokenizeRequest): Promise<number> {
if (!this.encoding) {
throw new Error("Encoding not initialized");
}
let tokenCount = 0;
const { messages, system, tools } = request;
// Count messages
if (Array.isArray(messages)) {
messages.forEach((message) => {
if (typeof message.content === "string") {
tokenCount += this.encoding!.encode(message.content).length;
} else if (Array.isArray(message.content)) {
message.content.forEach((contentPart: any) => {
if (contentPart.type === "text") {
tokenCount += this.encoding!.encode(contentPart.text).length;
} else if (contentPart.type === "tool_use") {
tokenCount += this.encoding!.encode(
JSON.stringify(contentPart.input)
).length;
} else if (contentPart.type === "tool_result") {
const content =
typeof contentPart.content === "string"
? contentPart.content
: JSON.stringify(contentPart.content);
tokenCount += this.encoding!.encode(content).length;
}
});
}
});
}
// Count system
if (typeof system === "string") {
tokenCount += this.encoding.encode(system).length;
} else if (Array.isArray(system)) {
system.forEach((item: any) => {
if (item.type !== "text") return;
if (typeof item.text === "string") {
tokenCount += this.encoding.encode(item.text).length;
} else if (Array.isArray(item.text)) {
item.text.forEach((textPart: any) => {
tokenCount += this.encoding.encode(textPart || "").length;
});
}
});
}
// Count tools
if (tools) {
tools.forEach((tool: any) => {
if (tool.description) {
tokenCount += this.encoding.encode(
tool.name + tool.description
).length;
}
if (tool.input_schema) {
tokenCount += this.encoding.encode(
JSON.stringify(tool.input_schema)
).length;
}
});
}
return tokenCount;
}
isInitialized(): boolean {
return this.encoding !== undefined;
}
dispose(): void {
if (this.encoding) {
this.encoding.free();
this.encoding = undefined;
}
}
}

View File

@@ -11,6 +11,7 @@ import type {
import type { ChatCompletionTool } from "openai/resources/chat/completions";
import type { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources/messages";
import { Transformer } from "./transformer";
import type { ProviderTokenizerConfig } from "./tokenizer";
export interface UrlCitation {
url: string;
@@ -236,4 +237,5 @@ export interface ConfigProvider {
use?: string[] | Array<any>[];
};
};
tokenizer?: ProviderTokenizerConfig;
}

131
packages/core/src/types/tokenizer.d.ts vendored Normal file
View File

@@ -0,0 +1,131 @@
/**
* Tokenizer configuration types
*/
/**
* Tokenizer type enum
*/
export type TokenizerType = 'tiktoken' | 'huggingface' | 'api';
/**
* API request format type
*/
export type ApiRequestFormat = 'standard' | 'openai' | 'anthropic' | 'custom';
/**
* Configuration for a specific tokenizer
*/
export interface TokenizerConfig {
/** Type of tokenizer to use */
type: TokenizerType;
/** For tiktoken: encoding name (e.g., 'cl100k_base', 'p50k_base') */
encoding?: string;
/** For huggingface: model ID (e.g., 'Qwen/Qwen2.5-Coder-32B-Instruct') */
model?: string;
/** For API-based tokenizers: complete API URL (e.g., 'https://api.example.com/v1/tokenize') */
url?: string;
/** For API-based tokenizers: API key */
apiKey?: string;
/** For API-based tokenizers: request format (default: 'standard') */
requestFormat?: ApiRequestFormat;
/** For API-based tokenizers: response field path to extract token count (default: 'token_count') */
responseField?: string;
/** For API-based tokenizers: custom request headers */
headers?: Record<string, string>;
/** Fallback tokenizer type if this one fails */
fallback?: TokenizerType;
}
/**
* Options for TokenizerService
*/
export interface TokenizerOptions {
/** Enable token count caching */
cacheEnabled?: boolean;
/** Maximum cache size */
cacheSize?: number;
/** Timeout for API calls (in milliseconds) */
timeout?: number;
}
/**
* Token count request structure (matches existing calculateTokenCount interface)
*/
export interface TokenizeRequest {
messages: Array<{
role: string;
content: string | Array<{
type: string;
text?: string;
input?: any;
content?: string | any;
}>;
}>;
system?: string | Array<{
type: string;
text?: string | string[];
}>;
tools?: Array<{
name: string;
description?: string;
input_schema: object;
}>;
}
/**
* Result from token counting operation
*/
export interface TokenizerResult {
/** Total token count */
tokenCount: number;
/** Name/type of tokenizer used */
tokenizerUsed: string;
/** Whether the result was from cache */
cached: boolean;
}
/**
* Abstract interface for all tokenizers
*/
export interface ITokenizer {
/** Tokenizer type identifier */
readonly type: string;
/** Human-readable tokenizer name */
readonly name: string;
/** Initialize the tokenizer (async for loading models, etc.) */
initialize(): Promise<void>;
/** Count tokens for a given request */
countTokens(request: TokenizeRequest): Promise<number>;
/** Check if tokenizer is initialized */
isInitialized(): boolean;
/** Clean up resources */
dispose(): void;
}
/**
* Provider-specific tokenizer configuration
*/
export interface ProviderTokenizerConfig {
/** Default tokenizer for all models in this provider */
default?: TokenizerConfig;
/** Model-specific tokenizer configurations */
models?: Record<string, TokenizerConfig>;
}

View File

@@ -6,6 +6,7 @@ import { join } from "path";
import { CLAUDE_PROJECTS_DIR, HOME_DIR } from "@CCR/shared";
import { LRUCache } from "lru-cache";
import { ConfigService } from "../services/config";
import { TokenizerService } from "../services/tokenizer";
// Types from @anthropic-ai/sdk
interface Tool {
@@ -200,6 +201,7 @@ const getUseModel = async (
export interface RouterContext {
configService: ConfigService;
tokenizerService?: TokenizerService;
event?: any;
}
@@ -225,12 +227,38 @@ export const router = async (req: any, _res: any, context: RouterContext) => {
}
try {
const tokenCount = calculateTokenCount(
messages as MessageParam[],
system,
tools as Tool[]
// Try to get tokenizer config for the current model
const [providerName, modelName] = req.body.model.split(",");
const tokenizerConfig = context.tokenizerService?.getTokenizerConfigForModel(
providerName,
modelName
);
// Use TokenizerService if available, otherwise fall back to legacy method
let tokenCount: number;
if (context.tokenizerService) {
const result = await context.tokenizerService.countTokens(
{
messages: messages as MessageParam[],
system,
tools: tools as Tool[],
},
tokenizerConfig
);
tokenCount = result.tokenCount;
req.log.debug(
`Token count: ${tokenCount} (tokenizer: ${result.tokenizerUsed}, cached: ${result.cached})`
);
} else {
// Legacy fallback
tokenCount = calculateTokenCount(
messages as MessageParam[],
system,
tools as Tool[]
);
}
let model;
const customRouterPath = configService.get("CUSTOM_ROUTER_PATH");
if (customRouterPath) {

View File

@@ -1,4 +1,4 @@
import Server, { calculateTokenCount } from "@musistudio/llms";
import Server, { calculateTokenCount, TokenizerService } from "@musistudio/llms";
import { readConfigFile, writeConfigFile, backupConfigFile } from "./utils";
import { join } from "path";
import fastifyStatic from "@fastify/static";
@@ -34,7 +34,48 @@ export const createServer = async (config: any): Promise<any> => {
});
app.post("/v1/messages/count_tokens", async (req: any, reply: any) => {
const {messages, tools, system} = req.body;
const {messages, tools, system, model} = req.body;
const tokenizerService = (app as any)._server!.tokenizerService as TokenizerService;
// If model is specified in "providerName,modelName" format, use the configured tokenizer
if (model && model.includes(",") && tokenizerService) {
try {
const [provider, modelName] = model.split(",");
req.log?.info(`Looking up tokenizer for provider: ${provider}, model: ${modelName}`);
const tokenizerConfig = tokenizerService.getTokenizerConfigForModel(provider, modelName);
if (!tokenizerConfig) {
req.log?.warn(`No tokenizer config found for ${provider},${modelName}, using default tiktoken`);
} else {
req.log?.info(`Using tokenizer config: ${JSON.stringify(tokenizerConfig)}`);
}
const result = await tokenizerService.countTokens(
{ messages, system, tools },
tokenizerConfig
);
return {
"input_tokens": result.tokenCount,
"tokenizer": result.tokenizerUsed,
};
} catch (error: any) {
req.log?.error(`Error using configured tokenizer: ${error.message}`);
req.log?.error(error.stack);
// Fall back to default calculation
}
} else {
if (!model) {
req.log?.info(`No model specified, using default tiktoken`);
} else if (!model.includes(",")) {
req.log?.info(`Model "${model}" does not contain comma, using default tiktoken`);
} else if (!tokenizerService) {
req.log?.warn(`TokenizerService not available, using default tiktoken`);
}
}
// Default to tiktoken calculation
const tokenCount = calculateTokenCount(messages, system, tools);
return { "input_tokens": tokenCount }
});

View File

@@ -68,4 +68,54 @@ declare module "@musistudio/llms" {
constructor(configService: any, logger: any);
initialize(): Promise<void>;
}
// Tokenizer types
export type TokenizerType = 'tiktoken' | 'huggingface' | 'api';
export type ApiRequestFormat = 'standard' | 'openai' | 'anthropic' | 'custom';
export interface TokenizerConfig {
type: TokenizerType;
encoding?: string;
model?: string;
url?: string;
apiKey?: string;
requestFormat?: ApiRequestFormat;
responseField?: string;
headers?: Record<string, string>;
fallback?: TokenizerType;
}
export interface TokenizeRequest {
messages: Array<{
role: string;
content: string | Array<{
type: string;
text?: string;
input?: any;
content?: string | any;
}>;
}>;
system?: string | Array<{
type: string;
text?: string | string[];
}>;
tools?: Array<{
name: string;
description?: string;
input_schema: object;
}>;
}
export interface TokenizerResult {
tokenCount: number;
tokenizerUsed: string;
cached: boolean;
}
export class TokenizerService {
countTokens(request: TokenizeRequest, config?: TokenizerConfig): Promise<TokenizerResult>;
getTokenizerConfigForModel(providerName: string, modelName: string): TokenizerConfig | undefined;
clearCache(): void;
dispose(): void;
}
}

23
pnpm-lock.yaml generated
View File

@@ -118,6 +118,9 @@ importers:
'@google/genai':
specifier: ^1.7.0
version: 1.24.0
'@huggingface/tokenizers':
specifier: ^0.0.6
version: 0.0.6
dotenv:
specifier: ^16.5.0
version: 16.6.1
@@ -2004,6 +2007,9 @@ packages:
'@hapi/topo@5.1.0':
resolution: {integrity: sha512-foQZKJig7Ob0BMAYBfcJk8d77QtOe7Wo4ox7ff1lQYoNNAb6jwcY1ncdoy2e9wQZzvNy7ODZCYJkK8kzmcAnAg==}
'@huggingface/tokenizers@0.0.6':
resolution: {integrity: sha512-bnn3VWPiOwk613kFGHCCTc2TOVB8QCCsHTJGVlfyV5tQIDCn83dWrFkLB670Pr2xVs1e5ziSwlysX+tga+R3nQ==}
'@humanfs/core@0.19.1':
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
engines: {node: '>=18.18.0'}
@@ -6917,6 +6923,11 @@ packages:
engines: {node: '>=10'}
hasBin: true
semver@7.7.3:
resolution: {integrity: sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==}
engines: {node: '>=10'}
hasBin: true
send@0.19.2:
resolution: {integrity: sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==}
engines: {node: '>= 0.8.0'}
@@ -10066,6 +10077,8 @@ snapshots:
dependencies:
'@hapi/hoek': 9.3.0
'@huggingface/tokenizers@0.0.6': {}
'@humanfs/core@0.19.1': {}
'@humanfs/node@0.16.7':
@@ -12084,7 +12097,7 @@ snapshots:
postcss-modules-scope: 3.2.1(postcss@8.5.6)
postcss-modules-values: 4.0.0(postcss@8.5.6)
postcss-value-parser: 4.2.0
semver: 7.7.2
semver: 7.7.3
optionalDependencies:
webpack: 5.104.1(esbuild@0.25.10)
@@ -14513,7 +14526,7 @@ snapshots:
got: 12.6.1
registry-auth-token: 5.1.0
registry-url: 6.0.1
semver: 7.7.2
semver: 7.7.3
param-case@3.0.4:
dependencies:
@@ -14792,7 +14805,7 @@ snapshots:
cosmiconfig: 8.3.6(typescript@5.9.3)
jiti: 1.21.7
postcss: 8.5.6
semver: 7.7.2
semver: 7.7.3
webpack: 5.104.1(esbuild@0.25.10)
transitivePeerDependencies:
- typescript
@@ -15639,7 +15652,7 @@ snapshots:
semver-diff@4.0.0:
dependencies:
semver: 7.7.2
semver: 7.7.3
semver@5.7.2: {}
@@ -15647,6 +15660,8 @@ snapshots:
semver@7.7.2: {}
semver@7.7.3: {}
send@0.19.2:
dependencies:
debug: 2.6.9