add tokenizer

2026-01-30 06:12:06 +00:00 · 2025-12-31 12:40:27 +08:00
parent 38cc5feadb
commit b69ebf7ad1
13 changed files with 1116 additions and 13 deletions
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -36,6 +36,7 @@
    "@anthropic-ai/sdk": "^0.54.0",
    "@fastify/cors": "^11.0.1",
    "@google/genai": "^1.7.0",
+    "@huggingface/tokenizers": "^0.0.6",
    "dotenv": "^16.5.0",
    "fastify": "^5.4.0",
    "fastify-plugin": "^5.1.0",
--- a/packages/core/src/server.ts
+++ b/packages/core/src/server.ts
@@ -30,6 +30,7 @@ import { errorHandler } from "./api/middleware";
 import { registerApiRoutes } from "./api/routes";
 import { ProviderService } from "./services/provider";
 import { TransformerService } from "./services/transformer";
+import { TokenizerService } from "./services/tokenizer";
 import { router, calculateTokenCount, searchProjectBySession } from "./utils/router";
 import { sessionUsageCache } from "./utils/cache";

@@ -68,6 +69,7 @@ class Server {
  configService: ConfigService;
  providerService!: ProviderService;
  transformerService: TransformerService;
+  tokenizerService: TokenizerService;

  constructor(options: ServerOptions = {}) {
    const { initialConfig, ...fastifyOptions } = options;
@@ -80,6 +82,10 @@ class Server {
      this.configService,
      this.app.log
    );
+    this.tokenizerService = new TokenizerService(
+      this.configService,
+      this.app.log
+    );
    this.transformerService.initialize().finally(() => {
      this.providerService = new ProviderService(
        this.configService,
@@ -87,6 +93,10 @@ class Server {
        this.app.log
      );
    });
+    // Initialize tokenizer service
+    this.tokenizerService.initialize().catch((error) => {
+      this.app.log.error(`Failed to initialize TokenizerService: ${error}`);
+    });
  }

  async register<Options extends FastifyPluginOptions = FastifyPluginOptions>(
@@ -127,12 +137,14 @@ class Server {
        fastify.decorate('configService', this.configService);
        fastify.decorate('transformerService', this.transformerService);
        fastify.decorate('providerService', this.providerService);
+        fastify.decorate('tokenizerService', this.tokenizerService);
        // Add router hook for main namespace
        fastify.addHook('preHandler', async (req: any, reply: any) => {
          const url = new URL(`http://127.0.0.1${req.url}`);
          if (url.pathname.endsWith("/v1/messages")) {
            await router(req, reply, {
              configService: this.configService,
+              tokenizerService: this.tokenizerService,
            });
          }
        });
@@ -157,16 +169,23 @@ class Server {
      transformerService,
      this.app.log
    );
+    const tokenizerService = new TokenizerService(
+      configService,
+      this.app.log
+    );
+    await tokenizerService.initialize();
    await this.app.register(async (fastify) => {
      fastify.decorate('configService', configService);
      fastify.decorate('transformerService', transformerService);
      fastify.decorate('providerService', providerService);
+      fastify.decorate('tokenizerService', tokenizerService);
      // Add router hook for namespace
      fastify.addHook('preHandler', async (req: any, reply: any) => {
        const url = new URL(`http://127.0.0.1${req.url}`);
        if (url.pathname.endsWith("/v1/messages")) {
          await router(req, reply, {
            configService,
+            tokenizerService,
          });
        }
      });
@@ -248,5 +267,6 @@ export { searchProjectBySession };
 export { ConfigService } from "./services/config";
 export { ProviderService } from "./services/provider";
 export { TransformerService } from "./services/transformer";
+export { TokenizerService } from "./services/tokenizer";
 export { pluginManager, tokenSpeedPlugin, CCRPlugin, CCRPluginOptions, PluginMetadata } from "./plugins";
 export { SSEParserTransform, SSESerializerTransform, rewriteStream } from "./utils/sse";
--- a/packages/core/src/services/tokenizer.ts
+++ b/packages/core/src/services/tokenizer.ts
@@ -0,0 +1,203 @@
+import { ConfigService } from "./config";
+import {
+  ITokenizer,
+  TokenizeRequest,
+  TokenizerConfig,
+  TokenizerResult,
+  TokenizerOptions,
+} from "../types/tokenizer";
+import { TiktokenTokenizer } from "../tokenizer/tiktoken-tokenizer";
+import { HuggingFaceTokenizer } from "../tokenizer/huggingface-tokenizer";
+import { ApiTokenizer } from "../tokenizer/api-tokenizer";
+
+/**
+ * TokenizerService - Manages tokenization for different model types
+ *
+ * Supports three types of tokenizers:
+ * - tiktoken: Fast, OpenAI-compatible tokenizer (default)
+ * - huggingface: Local model-based tokenizer for open-source models
+ * - api: API-based tokenizer for closed-source models
+ *
+ * Features:
+ * - Automatic fallback to tiktoken on errors
+ * - Config-driven tokenizer selection
+ * - Per-provider and per-model configuration
+ */
+export class TokenizerService {
+  private tokenizers: Map<string, ITokenizer> = new Map();
+  private configService: ConfigService;
+  private logger: any;
+  private options: TokenizerOptions;
+
+  // Fallback tokenizer (default to tiktoken)
+  private fallbackTokenizer?: ITokenizer;
+
+  constructor(
+    configService: ConfigService,
+    logger: any,
+    options: TokenizerOptions = {}
+  ) {
+    this.configService = configService;
+    this.logger = logger;
+    this.options = {
+      timeout: options.timeout ?? 30000,
+      ...options,
+    };
+  }
+
+  async initialize(): Promise<void> {
+    try {
+      // Initialize fallback tokenizer (tiktoken with cl100k_base)
+      this.fallbackTokenizer = new TiktokenTokenizer("cl100k_base");
+      await this.fallbackTokenizer.initialize();
+      this.tokenizers.set("fallback", this.fallbackTokenizer);
+
+      this.logger?.info("TokenizerService initialized successfully");
+    } catch (error: any) {
+      this.logger?.error(
+        `TokenizerService initialization error: ${error.message}`
+      );
+      throw error;
+    }
+  }
+
+  /**
+   * Get or create a tokenizer for specific configuration
+   */
+  async getTokenizer(config: TokenizerConfig): Promise<ITokenizer> {
+    const cacheKey = this.getCacheKey(config);
+
+    // Check cache first
+    if (this.tokenizers.has(cacheKey)) {
+      return this.tokenizers.get(cacheKey)!;
+    }
+
+    let tokenizer: ITokenizer;
+
+    try {
+      switch (config.type) {
+        case "tiktoken":
+          tokenizer = new TiktokenTokenizer(
+            config.encoding || "cl100k_base"
+          );
+          break;
+
+        case "huggingface":
+          this.logger?.info(`Initializing HuggingFace tokenizer for model: ${config.model}`);
+          tokenizer = new HuggingFaceTokenizer(
+            config.model!,
+            this.logger,
+            { timeout: this.options.timeout }
+          );
+          break;
+
+        case "api":
+          tokenizer = new ApiTokenizer(
+            config,
+            this.logger,
+            { timeout: this.options.timeout }
+          );
+          break;
+
+        default:
+          throw new Error(`Unknown tokenizer type: ${config.type}`);
+      }
+
+      this.logger?.info(`Calling initialize() on ${config.type} tokenizer...`);
+      await tokenizer.initialize();
+      this.tokenizers.set(cacheKey, tokenizer);
+
+      this.logger?.info(
+        `Tokenizer initialized successfully: ${config.type} (${cacheKey})`
+      );
+
+      return tokenizer;
+    } catch (error: any) {
+      this.logger?.error(
+        `Failed to initialize ${config.type} tokenizer: ${error.message}`
+      );
+      this.logger?.error(`Error stack: ${error.stack}`);
+
+      // Return fallback tokenizer
+      if (!this.fallbackTokenizer) {
+        await this.initialize();
+      }
+      return this.fallbackTokenizer!;
+    }
+  }
+
+  /**
+   * Count tokens for a request using the specified tokenizer configuration
+   */
+  async countTokens(
+    request: TokenizeRequest,
+    config?: TokenizerConfig
+  ): Promise<TokenizerResult> {
+    // Get appropriate tokenizer
+    const tokenizer = config
+      ? await this.getTokenizer(config)
+      : this.fallbackTokenizer!;
+
+    // Count tokens
+    const tokenCount = await tokenizer.countTokens(request);
+
+    return {
+      tokenCount,
+      tokenizerUsed: tokenizer.name,
+      cached: false,
+    };
+  }
+
+  /**
+   * Get tokenizer configuration for a specific model/provider
+   */
+  getTokenizerConfigForModel(
+    providerName: string,
+    modelName: string
+  ): TokenizerConfig | undefined {
+    const providers = this.configService.get<any[]>("providers") || [];
+    const provider = providers.find((p) => p.name === providerName);
+
+    if (!provider?.tokenizer) {
+      return undefined;
+    }
+
+    // Check model-specific config first
+    if (provider.tokenizer.models?.[modelName]) {
+      return provider.tokenizer.models[modelName];
+    }
+
+    // Fall back to default config
+    return provider.tokenizer.default;
+  }
+
+  /**
+   * Dispose all tokenizers
+   */
+  dispose(): void {
+    this.tokenizers.forEach((tokenizer) => {
+      try {
+        tokenizer.dispose();
+      } catch (error) {
+        this.logger?.error(`Error disposing tokenizer: ${error}`);
+      }
+    });
+    this.tokenizers.clear();
+  }
+
+  /**
+   * Generate cache key from tokenizer config
+   */
+  private getCacheKey(config: TokenizerConfig): string {
+    switch (config.type) {
+      case "tiktoken":
+        return `tiktoken:${config.encoding || "cl100k_base"}`;
+      case "huggingface":
+        return `hf:${config.model}`;
+      case "api":
+        return `api:${config.url}`;
+      default:
+        return `unknown:${JSON.stringify(config)}`;
+    }
+  }
+}
--- a/packages/core/src/tokenizer/api-tokenizer.ts
+++ b/packages/core/src/tokenizer/api-tokenizer.ts
@@ -0,0 +1,271 @@
+import {
+  ITokenizer,
+  TokenizeRequest,
+  TokenizerConfig,
+  ApiRequestFormat,
+} from "../types/tokenizer";
+
+/**
+ * Options for API tokenizer
+ */
+interface ApiTokenizerOptions {
+  timeout?: number;
+}
+
+/**
+ * API-based tokenizer implementation
+ * Calls provider's tokenization API to get token counts
+ * Supports flexible configuration for different API formats
+ */
+export class ApiTokenizer implements ITokenizer {
+  readonly type = "api";
+  readonly name: string;
+  private config: Required<Pick<TokenizerConfig, 'url' | 'apiKey' | 'requestFormat' | 'responseField'>> & {
+    headers: Record<string, string>;
+  };
+  private logger: any;
+  private options: ApiTokenizerOptions;
+
+  constructor(
+    config: TokenizerConfig,
+    logger: any,
+    options: ApiTokenizerOptions = {}
+  ) {
+    if (!config.url || !config.apiKey) {
+      throw new Error("API tokenizer requires url and apiKey");
+    }
+
+    this.config = {
+      url: config.url,
+      apiKey: config.apiKey,
+      requestFormat: config.requestFormat || "standard",
+      responseField: config.responseField || "token_count",
+      headers: config.headers || {},
+    };
+    this.logger = logger;
+    this.options = options;
+
+    try {
+      const url = new URL(config.url);
+      this.name = `api-${url.hostname}`;
+    } catch {
+      this.name = `api-${config.url}`;
+    }
+  }
+
+  async initialize(): Promise<void> {
+    // API tokenizers don't need initialization
+    // Just verify the URL is valid
+    try {
+      new URL(this.config.url);
+    } catch (error) {
+      throw new Error(`Invalid API URL: ${this.config.url}`);
+    }
+  }
+
+  async countTokens(request: TokenizeRequest): Promise<number> {
+    try {
+      // Prepare request body based on format
+      const requestBody = this.formatRequestBody(request);
+
+      // Prepare headers
+      const headers = {
+        "Content-Type": "application/json",
+        "Authorization": `Bearer ${this.config.apiKey}`,
+        ...this.config.headers,
+      };
+
+      // Make API call
+      const controller = new AbortController();
+      const timeoutId = setTimeout(
+        () => controller.abort(),
+        this.options.timeout || 30000
+      );
+
+      const response = await fetch(this.config.url, {
+        method: "POST",
+        headers,
+        body: JSON.stringify(requestBody),
+        signal: controller.signal,
+      });
+
+      clearTimeout(timeoutId);
+
+      if (!response.ok) {
+        throw new Error(
+          `API tokenizer request failed: ${response.status} ${response.statusText}`
+        );
+      }
+
+      const data = await response.json();
+
+      // Extract token count from response
+      const tokenCount = this.extractTokenCount(data);
+
+      return tokenCount;
+    } catch (error: any) {
+      if (error.name === "AbortError") {
+        throw new Error("API tokenizer request timed out");
+      }
+      throw error;
+    }
+  }
+
+  isInitialized(): boolean {
+    return true;
+  }
+
+  dispose(): void {
+    // Nothing to dispose for API tokenizer
+  }
+
+  /**
+   * Format request body based on requestFormat
+   */
+  private formatRequestBody(request: TokenizeRequest): any {
+    switch (this.config.requestFormat) {
+      case "standard":
+        // Standard format: send the entire request
+        return request;
+
+      case "openai":
+        // OpenAI format: extract text content and format as OpenAI request
+        return {
+          model: "gpt-3.5-turbo", // Placeholder, some APIs require this
+          messages: this.extractMessagesAsOpenAIFormat(request),
+        };
+
+      case "anthropic":
+        // Anthropic format: extract messages and tools
+        return {
+          messages: request.messages || [],
+          system: request.system,
+          tools: request.tools,
+        };
+
+      case "custom":
+        // Custom format: send concatenated text
+        return {
+          text: this.extractConcatenatedText(request),
+        };
+
+      default:
+        return request;
+    }
+  }
+
+  /**
+   * Extract messages in OpenAI format
+   */
+  private extractMessagesAsOpenAIFormat(request: TokenizeRequest): any[] {
+    if (!request.messages) return [];
+
+    return request.messages.map((msg) => ({
+      role: msg.role,
+      content: this.extractTextFromMessage(msg),
+    }));
+  }
+
+  /**
+   * Extract text from a message
+   */
+  private extractTextFromMessage(message: any): string {
+    if (typeof message.content === "string") {
+      return message.content;
+    }
+
+    if (Array.isArray(message.content)) {
+      return message.content
+        .map((part: any) => {
+          if (part.type === "text" && part.text) {
+            return part.text;
+          } else if (part.type === "tool_use" && part.input) {
+            return JSON.stringify(part.input);
+          } else if (part.type === "tool_result") {
+            return typeof part.content === "string"
+              ? part.content
+              : JSON.stringify(part.content);
+          }
+          return "";
+        })
+        .join(" ");
+    }
+
+    return "";
+  }
+
+  /**
+   * Extract all text from request
+   */
+  private extractConcatenatedText(request: TokenizeRequest): string {
+    const parts: string[] = [];
+
+    // Extract messages
+    if (request.messages) {
+      request.messages.forEach((msg) => {
+        parts.push(this.extractTextFromMessage(msg));
+      });
+    }
+
+    // Extract system
+    if (typeof request.system === "string") {
+      parts.push(request.system);
+    } else if (Array.isArray(request.system)) {
+      request.system.forEach((item: any) => {
+        if (item.type === "text") {
+          if (typeof item.text === "string") {
+            parts.push(item.text);
+          } else if (Array.isArray(item.text)) {
+            item.text.forEach((textPart: any) => {
+              if (textPart) parts.push(textPart);
+            });
+          }
+        }
+      });
+    }
+
+    // Extract tools
+    if (request.tools) {
+      request.tools.forEach((tool) => {
+        if (tool.name) parts.push(tool.name);
+        if (tool.description) parts.push(tool.description);
+        if (tool.input_schema) parts.push(JSON.stringify(tool.input_schema));
+      });
+    }
+
+    return parts.join(" ");
+  }
+
+  /**
+   * Extract token count from response using the configured field path
+   */
+  private extractTokenCount(data: any): number {
+    try {
+      const fieldPath = this.config.responseField;
+      const parts = fieldPath.split(".");
+
+      let value: any = data;
+      for (const part of parts) {
+        if (value === undefined || value === null) {
+          throw new Error(`Field path '${fieldPath}' not found in response`);
+        }
+        value = value[part];
+      }
+
+      if (typeof value !== "number") {
+        throw new Error(
+          `Expected number at field path '${fieldPath}', got ${typeof value}`
+        );
+      }
+
+      return value;
+    } catch (error: any) {
+      this.logger?.error(
+        `Failed to extract token count from API response: ${error.message}. Response: ${JSON.stringify(data)}`
+      );
+      throw new Error(
+        `Invalid response from API tokenizer: ${error.message}`
+      );
+    }
+  }
+}
--- a/packages/core/src/tokenizer/huggingface-tokenizer.ts
+++ b/packages/core/src/tokenizer/huggingface-tokenizer.ts
@@ -0,0 +1,229 @@
+import { join } from "path";
+import { homedir } from "os";
+import { existsSync, mkdirSync } from "fs";
+import { promises as fs } from "fs";
+import {
+  ITokenizer,
+  TokenizeRequest,
+} from "../types/tokenizer";
+import { Tokenizer } from "@huggingface/tokenizers";
+
+/**
+ * Options for HuggingFace tokenizer
+ */
+interface HFTokenizerOptions {
+  timeout?: number;
+  cacheDir?: string;
+}
+
+/**
+ * HuggingFace tokenizer implementation
+ * Uses @huggingface/tokenizers library for lightweight tokenization
+ */
+export class HuggingFaceTokenizer implements ITokenizer {
+  readonly type = "huggingface";
+  readonly name: string;
+  private readonly modelId: string;
+  private readonly logger: any;
+  private readonly options: HFTokenizerOptions;
+  private tokenizer: any = null;
+  private readonly cacheDir: string;
+  private readonly safeModelName: string;
+
+  constructor(modelId: string, logger: any, options: HFTokenizerOptions = {}) {
+    this.modelId = modelId;
+    this.logger = logger;
+    this.options = options;
+    this.cacheDir = options.cacheDir || join(homedir(), ".claude-code-router", ".huggingface");
+    // Cache safe model name to avoid repeated regex operations
+    this.safeModelName = modelId.replace(/\//g, "_").replace(/[^a-zA-Z0-9_-]/g, "_");
+    this.name = `huggingface-${modelId.split("/").pop()}`;
+  }
+
+  /**
+   * Get cache file paths for tokenizer files
+   */
+  private getCachePaths() {
+    const modelDir = join(this.cacheDir, this.safeModelName);
+    return {
+      modelDir,
+      tokenizerJson: join(modelDir, "tokenizer.json"),
+      tokenizerConfig: join(modelDir, "tokenizer_config.json"),
+    };
+  }
+
+  /**
+   * Ensure directory exists
+   */
+  private ensureDir(dir: string): void {
+    if (!existsSync(dir)) {
+      mkdirSync(dir, { recursive: true });
+    }
+  }
+
+  /**
+   * Load tokenizer files from local cache
+   */
+  private async loadFromCache(): Promise<{ tokenizerJson: any; tokenizerConfig: any } | null> {
+    try {
+      const paths = this.getCachePaths();
+
+      if (!existsSync(paths.tokenizerJson) || !existsSync(paths.tokenizerConfig)) {
+        return null;
+      }
+
+      const [tokenizerJsonContent, tokenizerConfigContent] = await Promise.all([
+        fs.readFile(paths.tokenizerJson, "utf-8"),
+        fs.readFile(paths.tokenizerConfig, "utf-8"),
+      ]);
+
+      return {
+        tokenizerJson: JSON.parse(tokenizerJsonContent),
+        tokenizerConfig: JSON.parse(tokenizerConfigContent),
+      };
+    } catch (error: any) {
+      this.logger?.warn(`Failed to load from cache: ${error.message}`);
+      return null;
+    }
+  }
+
+  /**
+   * Download tokenizer files from Hugging Face Hub and save to cache
+   */
+  private async downloadAndCache(): Promise<{ tokenizerJson: any; tokenizerConfig: any }> {
+    const paths = this.getCachePaths();
+    const urls = {
+      json: `https://huggingface.co/${this.modelId}/resolve/main/tokenizer.json`,
+      config: `https://huggingface.co/${this.modelId}/resolve/main/tokenizer_config.json`,
+    };
+
+    this.logger?.info(`Downloading tokenizer files for ${this.modelId}`);
+
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this.options.timeout || 30000);
+
+    try {
+      const [jsonRes, configRes] = await Promise.all([
+        fetch(urls.json, { signal: controller.signal }),
+        fetch(urls.config, { signal: controller.signal }),
+      ]);
+
+      if (!jsonRes.ok) {
+        throw new Error(`Failed to fetch tokenizer.json: ${jsonRes.statusText}`);
+      }
+
+      const [tokenizerJson, tokenizerConfig] = await Promise.all([
+        jsonRes.json(),
+        configRes.ok ? configRes.json() : Promise.resolve({}),
+      ]);
+
+      this.ensureDir(paths.modelDir);
+      await Promise.all([
+        fs.writeFile(paths.tokenizerJson, JSON.stringify(tokenizerJson, null, 2)),
+        fs.writeFile(paths.tokenizerConfig, JSON.stringify(tokenizerConfig, null, 2)),
+      ]);
+
+      return { tokenizerJson, tokenizerConfig };
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+
+  async initialize(): Promise<void> {
+    try {
+      this.logger?.info(`Initializing HuggingFace tokenizer: ${this.modelId}`);
+
+      const paths = this.getCachePaths();
+      this.ensureDir(this.cacheDir);
+
+      const tokenizerData = await this.loadFromCache() || await this.downloadAndCache();
+      this.tokenizer = new Tokenizer(tokenizerData.tokenizerJson, tokenizerData.tokenizerConfig);
+
+      this.logger?.info(`Tokenizer initialized: ${this.name}`);
+    } catch (error: any) {
+      this.logger?.error(`Failed to initialize tokenizer: ${error.message}`);
+      throw new Error(`Failed to initialize HuggingFace tokenizer for ${this.modelId}: ${error.message}`);
+    }
+  }
+
+  async countTokens(request: TokenizeRequest): Promise<number> {
+    if (!this.tokenizer) {
+      throw new Error("Tokenizer not initialized");
+    }
+
+    try {
+      const text = this.extractTextFromRequest(request);
+      return this.tokenizer.encode(text).ids.length;
+    } catch (error: any) {
+      this.logger?.error(`Error counting tokens: ${error.message}`);
+      throw error;
+    }
+  }
+
+  isInitialized(): boolean {
+    return this.tokenizer !== null;
+  }
+
+  dispose(): void {
+    this.tokenizer = null;
+  }
+
+  /**
+   * Extract text from tokenize request
+   */
+  private extractTextFromRequest(request: TokenizeRequest): string {
+    const parts: string[] = [];
+    const { messages, system, tools } = request;
+
+    // Extract messages
+    if (Array.isArray(messages)) {
+      for (const message of messages) {
+        if (typeof message.content === "string") {
+          parts.push(message.content);
+        } else if (Array.isArray(message.content)) {
+          for (const contentPart of message.content) {
+            if (contentPart.type === "text" && contentPart.text) {
+              parts.push(contentPart.text);
+            } else if (contentPart.type === "tool_use" && contentPart.input) {
+              parts.push(JSON.stringify(contentPart.input));
+            } else if (contentPart.type === "tool_result") {
+              parts.push(
+                typeof contentPart.content === "string"
+                  ? contentPart.content
+                  : JSON.stringify(contentPart.content)
+              );
+            }
+          }
+        }
+      }
+    }
+
+    // Extract system
+    if (typeof system === "string") {
+      parts.push(system);
+    } else if (Array.isArray(system)) {
+      for (const item of system) {
+        if (item.type === "text") {
+          if (typeof item.text === "string") {
+            parts.push(item.text);
+          } else if (Array.isArray(item.text)) {
+            for (const textPart of item.text) {
+              if (textPart) parts.push(textPart);
+            }
+          }
+        }
+      }
+    }
+
+    // Extract tools
+    if (tools) {
+      for (const tool of tools) {
+        if (tool.name) parts.push(tool.name);
+        if (tool.description) parts.push(tool.description);
+        if (tool.input_schema) parts.push(JSON.stringify(tool.input_schema));
+      }
+    }
+
+    return parts.join(" ");
+  }
+}
--- a/packages/core/src/tokenizer/tiktoken-tokenizer.ts
+++ b/packages/core/src/tokenizer/tiktoken-tokenizer.ts
@@ -0,0 +1,112 @@
+import { get_encoding, Tiktoken } from "tiktoken";
+import {
+  ITokenizer,
+  TokenizeRequest,
+} from "../types/tokenizer";
+
+/**
+ * Tiktoken-based tokenizer implementation
+ * Uses tiktoken library for fast token counting (OpenAI compatible)
+ */
+export class TiktokenTokenizer implements ITokenizer {
+  readonly type = "tiktoken";
+  readonly name: string;
+  private encoding?: Tiktoken;
+  private encodingName: string;
+
+  constructor(encodingName: string = "cl100k_base") {
+    this.encodingName = encodingName;
+    this.name = `tiktoken-${encodingName}`;
+    try {
+      this.encoding = get_encoding(encodingName);
+    } catch (error) {
+      throw new Error(`Failed to initialize tiktoken encoding: ${encodingName}`);
+    }
+  }
+
+  async initialize(): Promise<void> {
+    // Encoding is already initialized in constructor
+    if (!this.encoding) {
+      throw new Error("Tiktoken encoding not initialized");
+    }
+  }
+
+  async countTokens(request: TokenizeRequest): Promise<number> {
+    if (!this.encoding) {
+      throw new Error("Encoding not initialized");
+    }
+
+    let tokenCount = 0;
+    const { messages, system, tools } = request;
+
+    // Count messages
+    if (Array.isArray(messages)) {
+      messages.forEach((message) => {
+        if (typeof message.content === "string") {
+          tokenCount += this.encoding!.encode(message.content).length;
+        } else if (Array.isArray(message.content)) {
+          message.content.forEach((contentPart: any) => {
+            if (contentPart.type === "text") {
+              tokenCount += this.encoding!.encode(contentPart.text).length;
+            } else if (contentPart.type === "tool_use") {
+              tokenCount += this.encoding!.encode(
+                JSON.stringify(contentPart.input)
+              ).length;
+            } else if (contentPart.type === "tool_result") {
+              const content =
+                typeof contentPart.content === "string"
+                  ? contentPart.content
+                  : JSON.stringify(contentPart.content);
+              tokenCount += this.encoding!.encode(content).length;
+            }
+          });
+        }
+      });
+    }
+
+    // Count system
+    if (typeof system === "string") {
+      tokenCount += this.encoding.encode(system).length;
+    } else if (Array.isArray(system)) {
+      system.forEach((item: any) => {
+        if (item.type !== "text") return;
+        if (typeof item.text === "string") {
+          tokenCount += this.encoding.encode(item.text).length;
+        } else if (Array.isArray(item.text)) {
+          item.text.forEach((textPart: any) => {
+            tokenCount += this.encoding.encode(textPart || "").length;
+          });
+        }
+      });
+    }
+
+    // Count tools
+    if (tools) {
+      tools.forEach((tool: any) => {
+        if (tool.description) {
+          tokenCount += this.encoding.encode(
+            tool.name + tool.description
+          ).length;
+        }
+        if (tool.input_schema) {
+          tokenCount += this.encoding.encode(
+            JSON.stringify(tool.input_schema)
+          ).length;
+        }
+      });
+    }
+
+    return tokenCount;
+  }
+
+  isInitialized(): boolean {
+    return this.encoding !== undefined;
+  }
+
+  dispose(): void {
+    if (this.encoding) {
+      this.encoding.free();
+      this.encoding = undefined;
+    }
+  }
+}
--- a/packages/core/src/types/llm.ts
+++ b/packages/core/src/types/llm.ts
@@ -11,6 +11,7 @@ import type {
 import type { ChatCompletionTool } from "openai/resources/chat/completions";
 import type { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources/messages";
 import { Transformer } from "./transformer";
+import type { ProviderTokenizerConfig } from "./tokenizer";

 export interface UrlCitation {
  url: string;
@@ -236,4 +237,5 @@ export interface ConfigProvider {
      use?: string[] | Array<any>[];
    };
  };
+  tokenizer?: ProviderTokenizerConfig;
 }
--- a/packages/core/src/types/tokenizer.d.ts
+++ b/packages/core/src/types/tokenizer.d.ts
@@ -0,0 +1,131 @@
+/**
+ * Tokenizer configuration types
+ */
+
+/**
+ * Tokenizer type enum
+ */
+export type TokenizerType = 'tiktoken' | 'huggingface' | 'api';
+
+/**
+ * API request format type
+ */
+export type ApiRequestFormat = 'standard' | 'openai' | 'anthropic' | 'custom';
+
+/**
+ * Configuration for a specific tokenizer
+ */
+export interface TokenizerConfig {
+  /** Type of tokenizer to use */
+  type: TokenizerType;
+
+  /** For tiktoken: encoding name (e.g., 'cl100k_base', 'p50k_base') */
+  encoding?: string;
+
+  /** For huggingface: model ID (e.g., 'Qwen/Qwen2.5-Coder-32B-Instruct') */
+  model?: string;
+
+  /** For API-based tokenizers: complete API URL (e.g., 'https://api.example.com/v1/tokenize') */
+  url?: string;
+
+  /** For API-based tokenizers: API key */
+  apiKey?: string;
+
+  /** For API-based tokenizers: request format (default: 'standard') */
+  requestFormat?: ApiRequestFormat;
+
+  /** For API-based tokenizers: response field path to extract token count (default: 'token_count') */
+  responseField?: string;
+
+  /** For API-based tokenizers: custom request headers */
+  headers?: Record<string, string>;
+
+  /** Fallback tokenizer type if this one fails */
+  fallback?: TokenizerType;
+}
+
+/**
+ * Options for TokenizerService
+ */
+export interface TokenizerOptions {
+  /** Enable token count caching */
+  cacheEnabled?: boolean;
+
+  /** Maximum cache size */
+  cacheSize?: number;
+
+  /** Timeout for API calls (in milliseconds) */
+  timeout?: number;
+}
+
+/**
+ * Token count request structure (matches existing calculateTokenCount interface)
+ */
+export interface TokenizeRequest {
+  messages: Array<{
+    role: string;
+    content: string | Array<{
+      type: string;
+      text?: string;
+      input?: any;
+      content?: string | any;
+    }>;
+  }>;
+  system?: string | Array<{
+    type: string;
+    text?: string | string[];
+  }>;
+  tools?: Array<{
+    name: string;
+    description?: string;
+    input_schema: object;
+  }>;
+}
+
+/**
+ * Result from token counting operation
+ */
+export interface TokenizerResult {
+  /** Total token count */
+  tokenCount: number;
+
+  /** Name/type of tokenizer used */
+  tokenizerUsed: string;
+
+  /** Whether the result was from cache */
+  cached: boolean;
+}
+
+/**
+ * Abstract interface for all tokenizers
+ */
+export interface ITokenizer {
+  /** Tokenizer type identifier */
+  readonly type: string;
+
+  /** Human-readable tokenizer name */
+  readonly name: string;
+
+  /** Initialize the tokenizer (async for loading models, etc.) */
+  initialize(): Promise<void>;
+
+  /** Count tokens for a given request */
+  countTokens(request: TokenizeRequest): Promise<number>;
+
+  /** Check if tokenizer is initialized */
+  isInitialized(): boolean;
+
+  /** Clean up resources */
+  dispose(): void;
+}
+
+/**
+ * Provider-specific tokenizer configuration
+ */
+export interface ProviderTokenizerConfig {
+  /** Default tokenizer for all models in this provider */
+  default?: TokenizerConfig;
+
+  /** Model-specific tokenizer configurations */
+  models?: Record<string, TokenizerConfig>;
+}
--- a/packages/core/src/utils/router.ts
+++ b/packages/core/src/utils/router.ts
@@ -6,6 +6,7 @@ import { join } from "path";
 import { CLAUDE_PROJECTS_DIR, HOME_DIR } from "@CCR/shared";
 import { LRUCache } from "lru-cache";
 import { ConfigService } from "../services/config";
+import { TokenizerService } from "../services/tokenizer";

 // Types from @anthropic-ai/sdk
 interface Tool {
@@ -200,6 +201,7 @@ const getUseModel = async (

 export interface RouterContext {
  configService: ConfigService;
+  tokenizerService?: TokenizerService;
  event?: any;
 }

@@ -225,12 +227,38 @@ export const router = async (req: any, _res: any, context: RouterContext) => {
  }

  try {
-    const tokenCount = calculateTokenCount(
-      messages as MessageParam[],
-      system,
-      tools as Tool[]
+    // Try to get tokenizer config for the current model
+    const [providerName, modelName] = req.body.model.split(",");
+    const tokenizerConfig = context.tokenizerService?.getTokenizerConfigForModel(
+      providerName,
+      modelName
    );

+    // Use TokenizerService if available, otherwise fall back to legacy method
+    let tokenCount: number;
+
+    if (context.tokenizerService) {
+      const result = await context.tokenizerService.countTokens(
+        {
+          messages: messages as MessageParam[],
+          system,
+          tools: tools as Tool[],
+        },
+        tokenizerConfig
+      );
+      tokenCount = result.tokenCount;
+      req.log.debug(
+        `Token count: ${tokenCount} (tokenizer: ${result.tokenizerUsed}, cached: ${result.cached})`
+      );
+    } else {
+      // Legacy fallback
+      tokenCount = calculateTokenCount(
+        messages as MessageParam[],
+        system,
+        tools as Tool[]
+      );
+    }
+
    let model;
    const customRouterPath = configService.get("CUSTOM_ROUTER_PATH");
    if (customRouterPath) {
--- a/packages/server/src/server.ts
+++ b/packages/server/src/server.ts
@@ -1,4 +1,4 @@
-import Server, { calculateTokenCount } from "@musistudio/llms";
+import Server, { calculateTokenCount, TokenizerService } from "@musistudio/llms";
 import { readConfigFile, writeConfigFile, backupConfigFile } from "./utils";
 import { join } from "path";
 import fastifyStatic from "@fastify/static";
@@ -34,7 +34,48 @@ export const createServer = async (config: any): Promise<any> => {
  });

  app.post("/v1/messages/count_tokens", async (req: any, reply: any) => {
-    const {messages, tools, system} = req.body;
+    const {messages, tools, system, model} = req.body;
+    const tokenizerService = (app as any)._server!.tokenizerService as TokenizerService;
+
+    // If model is specified in "providerName,modelName" format, use the configured tokenizer
+    if (model && model.includes(",") && tokenizerService) {
+      try {
+        const [provider, modelName] = model.split(",");
+        req.log?.info(`Looking up tokenizer for provider: ${provider}, model: ${modelName}`);
+
+        const tokenizerConfig = tokenizerService.getTokenizerConfigForModel(provider, modelName);
+
+        if (!tokenizerConfig) {
+          req.log?.warn(`No tokenizer config found for ${provider},${modelName}, using default tiktoken`);
+        } else {
+          req.log?.info(`Using tokenizer config: ${JSON.stringify(tokenizerConfig)}`);
+        }
+
+        const result = await tokenizerService.countTokens(
+          { messages, system, tools },
+          tokenizerConfig
+        );
+
+        return {
+          "input_tokens": result.tokenCount,
+          "tokenizer": result.tokenizerUsed,
+        };
+      } catch (error: any) {
+        req.log?.error(`Error using configured tokenizer: ${error.message}`);
+        req.log?.error(error.stack);
+        // Fall back to default calculation
+      }
+    } else {
+      if (!model) {
+        req.log?.info(`No model specified, using default tiktoken`);
+      } else if (!model.includes(",")) {
+        req.log?.info(`Model "${model}" does not contain comma, using default tiktoken`);
+      } else if (!tokenizerService) {
+        req.log?.warn(`TokenizerService not available, using default tiktoken`);
+      }
+    }
+
+    // Default to tiktoken calculation
    const tokenCount = calculateTokenCount(messages, system, tools);
    return { "input_tokens": tokenCount }
  });
--- a/packages/server/src/types.d.ts
+++ b/packages/server/src/types.d.ts
@@ -68,4 +68,54 @@ declare module "@musistudio/llms" {
    constructor(configService: any, logger: any);
    initialize(): Promise<void>;
  }
+
+  // Tokenizer types
+  export type TokenizerType = 'tiktoken' | 'huggingface' | 'api';
+  export type ApiRequestFormat = 'standard' | 'openai' | 'anthropic' | 'custom';
+
+  export interface TokenizerConfig {
+    type: TokenizerType;
+    encoding?: string;
+    model?: string;
+    url?: string;
+    apiKey?: string;
+    requestFormat?: ApiRequestFormat;
+    responseField?: string;
+    headers?: Record<string, string>;
+    fallback?: TokenizerType;
+  }
+
+  export interface TokenizeRequest {
+    messages: Array<{
+      role: string;
+      content: string | Array<{
+        type: string;
+        text?: string;
+        input?: any;
+        content?: string | any;
+      }>;
+    }>;
+    system?: string | Array<{
+      type: string;
+      text?: string | string[];
+    }>;
+    tools?: Array<{
+      name: string;
+      description?: string;
+      input_schema: object;
+    }>;
+  }
+
+  export interface TokenizerResult {
+    tokenCount: number;
+    tokenizerUsed: string;
+    cached: boolean;
+  }
+
+  export class TokenizerService {
+    countTokens(request: TokenizeRequest, config?: TokenizerConfig): Promise<TokenizerResult>;
+    getTokenizerConfigForModel(providerName: string, modelName: string): TokenizerConfig | undefined;
+    clearCache(): void;
+    dispose(): void;
+  }
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -118,6 +118,9 @@ importers:
      '@google/genai':
        specifier: ^1.7.0
        version: 1.24.0
+      '@huggingface/tokenizers':
+        specifier: ^0.0.6
+        version: 0.0.6
      dotenv:
        specifier: ^16.5.0
        version: 16.6.1
@@ -2004,6 +2007,9 @@ packages:
  '@hapi/topo@5.1.0':
    resolution: {integrity: sha512-foQZKJig7Ob0BMAYBfcJk8d77QtOe7Wo4ox7ff1lQYoNNAb6jwcY1ncdoy2e9wQZzvNy7ODZCYJkK8kzmcAnAg==}

+  '@huggingface/tokenizers@0.0.6':
+    resolution: {integrity: sha512-bnn3VWPiOwk613kFGHCCTc2TOVB8QCCsHTJGVlfyV5tQIDCn83dWrFkLB670Pr2xVs1e5ziSwlysX+tga+R3nQ==}
+
  '@humanfs/core@0.19.1':
    resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
    engines: {node: '>=18.18.0'}
@@ -6917,6 +6923,11 @@ packages:
    engines: {node: '>=10'}
    hasBin: true

+  semver@7.7.3:
+    resolution: {integrity: sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==}
+    engines: {node: '>=10'}
+    hasBin: true
+
  send@0.19.2:
    resolution: {integrity: sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==}
    engines: {node: '>= 0.8.0'}
@@ -10066,6 +10077,8 @@ snapshots:
    dependencies:
      '@hapi/hoek': 9.3.0

+  '@huggingface/tokenizers@0.0.6': {}
+
  '@humanfs/core@0.19.1': {}

  '@humanfs/node@0.16.7':
@@ -12084,7 +12097,7 @@ snapshots:
      postcss-modules-scope: 3.2.1(postcss@8.5.6)
      postcss-modules-values: 4.0.0(postcss@8.5.6)
      postcss-value-parser: 4.2.0
-      semver: 7.7.2
+      semver: 7.7.3
    optionalDependencies:
      webpack: 5.104.1(esbuild@0.25.10)

@@ -14513,7 +14526,7 @@ snapshots:
      got: 12.6.1
      registry-auth-token: 5.1.0
      registry-url: 6.0.1
-      semver: 7.7.2
+      semver: 7.7.3

  param-case@3.0.4:
    dependencies:
@@ -14792,7 +14805,7 @@ snapshots:
      cosmiconfig: 8.3.6(typescript@5.9.3)
      jiti: 1.21.7
      postcss: 8.5.6
-      semver: 7.7.2
+      semver: 7.7.3
      webpack: 5.104.1(esbuild@0.25.10)
    transitivePeerDependencies:
      - typescript
@@ -15639,7 +15652,7 @@ snapshots:

  semver-diff@4.0.0:
    dependencies:
-      semver: 7.7.2
+      semver: 7.7.3

  semver@5.7.2: {}

@@ -15647,6 +15660,8 @@ snapshots:

  semver@7.7.2: {}

+  semver@7.7.3: {}
+
  send@0.19.2:
    dependencies:
      debug: 2.6.9