feat: Triển khai hệ thống lập chỉ mục và chấm điểm quy tắc mới, bao gồm các...

feat: Triển khai hệ thống lập chỉ mục và chấm điểm quy tắc mới, bao gồm các module lập chỉ mục, chấm điểm, soạn thảo và trình chạy thử nghiệm.

feat: Triển khai hệ thống lập chỉ mục và chấm điểm quy tắc mới, bao gồm các...
feat: Triển khai hệ thống lập chỉ mục và chấm điểm quy tắc mới, bao gồm các module lập chỉ mục, chấm điểm, soạn thảo và trình chạy thử nghiệm.
5fa44c30 · Long Lê Đăng · 58201380 · 5fa44c30 · 5fa44c30 · 5fa44c30
Commit 5fa44c30 authored Feb 01, 2026 by Long Lê Đăng
20 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,7 @@ node_modules
 .pnp.js

 # Production
-dist/
+# dist/
 build/

 # Misc

--- a/CLAUDE.md
+++ b/CLAUDE.md
+# CLAUDE.md - Rules & Guidelines
+
+## 🚨 MANDATORY INITIAL STEP 🚨
+
+**Before** generating any code or answering a complex request, you **MUST** first consult the project's rule database via the MCP server:
+
+1.  **EXECUTE TOOL**: `rules_search`
+    - `query`: The user's exact request or a summary of the task.
+    - `limit`: 5 (or more if complex).
+    - `openFiles`: Pass the currently open file paths if relevant.
+
+2.  **APPLY RULES**:
+    - Carefully read the returned rules.
+    - **Prioritize** these rules over your default training.
+    - If the rules specify coding styles, architecture patterns, or specific libraries, **OBEY THEM**.
+
+---
+
+## Development Info (For this MCP Project)
+
+- **Build**: `npm run build`
+- **Start**: `npm start`
+- **Watch**: `npm run watch` (if available) or `tsc -w`
+- **Test**: `npm test`
--- a/CLIENT_CLAUDE_TEMPLATE.md
+++ b/CLIENT_CLAUDE_TEMPLATE.md
+# CLAUDE.md - Client Project Template
+
+> **Usage**: Copy this file to the root of your project as `CLAUDE.md` to enforce rule retrieval via the Awing Rules MCP.
+
+## 🚨 MANDATORY INITIAL STEP 🚨
+
+**Before** executing any task, editing code, or answering complex questions, you **MUST** consult the project's centralized rule database via the connected MCP server:
+
+1.  **EXECUTE TOOL**: `rules_search` (from `awing-rules-claudecode-mcp`)
+    - `query`: The user's exact request, task description, or question.
+    - `limit`: 5 (default).
+    - `openFiles`: Pass the list of currently open file paths.
+    - `changedFiles`: (Optional) Pass list of recently changed files if available.
+
+2.  **APPLY RULES**:
+    - **Read Carefully**: The tool returns rules specifically relevant to your current context.
+    - **Prioritize**: These rules **override** your default training and general best practices.
+    - **Obey**: Strictly follow any coding styles, architecture patterns, or library constraints defined in the returned rules.
+
+---
+
+## Project Commands
+
+*Replace the below with your project-specific commands*
+
+- **Build**: `npm run build`
+- **Test**: `npm test`
+- **Start**: `npm start`
--- a/base.md
+++ b/base.md
@@ -22,3 +22,5 @@ paths: ["**/*"]
 ## Git & Version Control
 - Commit messages must be descriptive (e.g., `feat: check user permissions`).
 - Do not commit generated files or secrets.
+
+Answer in Vietnamese
--- a/dist/index.js
+++ b/dist/index.js
+#!/usr/bin/env node
+import { Server } from '@modelcontextprotocol/sdk/server/index.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js';
+import { z } from 'zod';
+import { RuleIndexer } from './rules/indexer.js';
+import { Composer } from './rules/composer.js';
+import * as path from 'path';
+import { fileURLToPath } from 'url';
+// --- Configuration ---
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const ROOT_DIR = path.resolve(__dirname, '..'); // storage root is one level up from dist
+// --- Initialization ---
+const indexer = new RuleIndexer(ROOT_DIR);
+const composer = new Composer(ROOT_DIR);
+// Init index on startup (async but we assume fast enough or lazy)
+indexer.init().catch(err => console.error('Failed to init index:', err));
+const server = new Server({
+    name: 'awing-rules-claudecode-mcp',
+    version: '0.1.0',
+}, {
+    capabilities: {
+        tools: {},
+    },
+});
+// --- Tool Definitions ---
+server.setRequestHandler(ListToolsRequestSchema, async () => {
+    return {
+        tools: [
+            {
+                name: 'rules_search',
+                description: 'Search for relevant project rules based on query and context.',
+                inputSchema: {
+                    type: 'object',
+                    properties: {
+                        query: { type: 'string', description: 'The search query or user task description' },
+                        openFiles: { type: 'array', items: { type: 'string' }, description: 'List of currently open file paths' },
+                        changedFiles: { type: 'array', items: { type: 'string' }, description: 'List of recently changed file paths' },
+                        tags: { type: 'array', items: { type: 'string' }, description: 'Filter by tags' },
+                        limit: { type: 'number', description: 'Max number of results to return (default 6)' },
+                        minScore: { type: 'number', description: 'Minimum score threshold (default 0.15)' }
+                    },
+                    required: ['query'],
+                },
+            },
+            {
+                name: 'rules_get',
+                description: 'Get details of a specific rule by ID or path.',
+                inputSchema: {
+                    type: 'object',
+                    properties: {
+                        id: { type: 'string' },
+                        path: { type: 'string' },
+                        mode: { type: 'string', enum: ['full', 'snippet', 'sections'], default: 'snippet' }
+                    },
+                },
+            },
+            {
+                name: 'rules_compose',
+                description: 'Compose a rule bundle from selected rules and the base rule.',
+                inputSchema: {
+                    type: 'object',
+                    properties: {
+                        selected: {
+                            type: 'array',
+                            items: {
+                                type: 'object',
+                                properties: {
+                                    id: { type: 'string' },
+                                    path: { type: 'string' }
+                                }
+                            }
+                        },
+                        mode: { type: 'string', enum: ['full', 'snippet'], default: 'snippet' },
+                        dedupe: { type: 'boolean', default: true },
+                        maxChars: { type: 'number', default: 12000 }
+                    },
+                    required: ['selected'],
+                },
+            },
+            {
+                name: 'rules_refresh',
+                description: 'Refresh the rule index from disk.',
+                inputSchema: {
+                    type: 'object',
+                    properties: {},
+                },
+            },
+        ],
+    };
+});
+// --- Tool Handlers ---
+server.setRequestHandler(CallToolRequestSchema, async (request) => {
+    const { name, arguments: args } = request.params;
+    if (name === 'rules_refresh') {
+        await indexer.refresh();
+        return {
+            content: [{ type: 'text', text: 'Rule index refreshed successfully.' }]
+        };
+    }
+    if (name === 'rules_search') {
+        const input = z.object({
+            query: z.string(),
+            openFiles: z.array(z.string()).optional(),
+            changedFiles: z.array(z.string()).optional(),
+            tags: z.array(z.string()).optional(),
+            limit: z.number().optional(),
+            minScore: z.number().optional()
+        }).parse(args);
+        const results = indexer.search(input.query, {
+            openFiles: input.openFiles,
+            changedFiles: input.changedFiles,
+            tags: input.tags,
+            limit: input.limit,
+            minScore: input.minScore
+        });
+        return {
+            content: [
+                {
+                    type: 'text',
+                    text: JSON.stringify(results.map(r => ({
+                        id: r.rule.id,
+                        path: r.rule.relativePath,
+                        title: r.rule.title,
+                        score: r.score.toFixed(2),
+                        tags: r.rule.tags,
+                        content: r.rule.content,
+                        why: `Text:${r.scoreBreakdown.text.toFixed(2)} Path:${r.scoreBreakdown.path.toFixed(2)} Tag:${r.scoreBreakdown.tag.toFixed(2)}`
+                    })), null, 2),
+                },
+            ],
+        };
+    }
+    if (name === 'rules_get') {
+        const input = z.object({
+            id: z.string().optional(),
+            path: z.string().optional(),
+            mode: z.string().optional(),
+        }).parse(args);
+        const rule = indexer.getRuleByIdOrPath(input.id || input.path || '');
+        if (!rule) {
+            return { isError: true, content: [{ type: 'text', text: 'Rule not found' }] };
+        }
+        return {
+            content: [
+                {
+                    type: 'text',
+                    text: JSON.stringify({
+                        id: rule.id,
+                        path: rule.relativePath,
+                        title: rule.title,
+                        content: rule.content // TODO: Apply mode/snippet logic if needed
+                    }, null, 2),
+                },
+            ],
+        };
+    }
+    if (name === 'rules_compose') {
+        const input = z.object({
+            selected: z.array(z.object({ id: z.string().optional(), path: z.string().optional() })),
+            mode: z.enum(['full', 'snippet']).optional(),
+            dedupe: z.boolean().optional(),
+            maxChars: z.number().optional()
+        }).parse(args);
+        // Resolve rules
+        const resolvedRules = input.selected.map(sel => {
+            const rule = indexer.getRuleByIdOrPath(sel.id || sel.path || '');
+            return { ...sel, rule };
+        });
+        const bundle = await composer.compose(resolvedRules, {
+            mode: input.mode,
+            dedupe: input.dedupe,
+            maxChars: input.maxChars
+        });
+        return {
+            content: [
+                {
+                    type: 'text',
+                    text: bundle.content,
+                },
+            ],
+        };
+    }
+    throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
+});
+async function runServer() {
+    const transport = new StdioServerTransport();
+    await server.connect(transport);
+    console.error('Moving Rules MCP Server running on stdio');
+}
+runServer().catch((error) => {
+    console.error('Fatal error running server:', error);
+    process.exit(1);
+});
--- a/dist/rules/composer.js
+++ b/dist/rules/composer.js
+import { RuleLoader } from './loader.js';
+import * as path from 'path';
+export class Composer {
+    loader;
+    constructor(baseDir) {
+        this.loader = new RuleLoader(baseDir);
+    }
+    async compose(selectedRules, options = {}) {
+        const { maxChars = 12000, dedupe = true, mode = 'snippet' } = options;
+        // 1. Load Base Rule
+        const baseContent = await this.loader.loadBaseRule();
+        // 2. Resolve Selected Rules
+        // The indexer might have passed the full Rule object or just ID/Path.
+        // Ideally, the caller (MCP tool) resolves the rules first using Indexer, or passes them in.
+        // For simplicity, let's assume 'selectedRules' contains the Rule object if passed from search,
+        // or we might need to look them up. But Composer doesn't have the Indexer reference.
+        // Design choice: MCP server does the lookup. Composer just formats.
+        // So we expect `rule` property to be present or we can't extract info.
+        // Actually, let's make the input strictly `Rule[]`.
+        // Refactoring spec: input `selected` is [{id?, path?}].
+        // Composer needs access to Indexer or Loader to fetch content if not provided?
+        // Let's assume the MCP layer fetches the rules.
+        // "Input: selected: [{ id?: string, path?: string }]".
+        // WE need to fetch them. So Composer should take an Indexer or Loader.
+        // But Loader is expensive to re-scan. Indexer has cache.
+        // Let's pass the *resolved* rules to the compose function to keep it pure(r).
+        // Or simpler: Composer owns the formatting string logic, MCP server orchestration handles lookup.
+        // Let's write this class to accept *resolved* array of Rules.
+        // If the tool definition says input is just ids, the MCP handler will use Indexer to get the Rules, then pass them here.
+        const rulesToInclude = selectedRules
+            .map(s => s.rule)
+            .filter((r) => !!r && r.id !== 'global-base' && path.basename(r.path) !== 'base.md');
+        // 3. Start composing
+        let bundle = `# RULE BUNDLE\n> [!IMPORTANT] Priority Order: Selected rules override Base rules where conflicting. Security constraints in Base rules are non-negotiable.\n\n`;
+        // Add Base
+        bundle += `## BASE RULES\n\n${baseContent}\n\n`;
+        // Add Selected
+        const used = [];
+        const notes = [];
+        for (const rule of rulesToInclude) {
+            bundle += `## ${rule.title} (ID: ${rule.id})\nTitle: ${rule.title}\nSource: ${rule.relativePath}\n\n`;
+            let contentToAdd = rule.content;
+            // Snippet mode logic
+            if (mode === 'snippet') {
+                contentToAdd = this.extractSnippet(contentToAdd);
+            }
+            bundle += contentToAdd + `\n\n---\n\n`;
+            used.push({ id: rule.id, path: rule.path, title: rule.title });
+        }
+        // 4. Deduplication
+        if (dedupe) {
+            bundle = this.deduplicateLines(bundle);
+        }
+        // 5. Truncation
+        let truncated = false;
+        if (bundle.length > maxChars) {
+            bundle = bundle.slice(0, maxChars) + '\n... (truncated)';
+            truncated = true;
+            notes.push(`Output truncated to ${maxChars} chars.`);
+        }
+        return {
+            content: bundle,
+            used,
+            truncated,
+            notes
+        };
+    }
+    extractSnippet(content) {
+        // Keep headings, bullets, blockquotes
+        const lines = content.split('\n');
+        const keep = [];
+        let insideCodeBlock = false;
+        for (const line of lines) {
+            if (line.trim().startsWith('```')) {
+                insideCodeBlock = !insideCodeBlock;
+                // In snippet mode, maybe omit long code blocks? 
+                // Let's keep them if they are short? 
+                // For now, keep everything but maybe limit length?
+                keep.push(line);
+                continue;
+            }
+            const isHeader = /^#+\s/.test(line);
+            const isList = /^[\s]*[-*+]\s/.test(line);
+            const isQuote = /^>\s/.test(line); // e.g. alerts
+            if (isHeader || isList || isQuote || insideCodeBlock) {
+                keep.push(line);
+            }
+            else {
+                // Plain text? Skip in harsh snippet mode?
+                // Let's keep it if it's short, or skip?
+                // Prompt says: "Prioritize headings + bullet lines + Do/Don't"
+                // Let's conservatively keep non-empty logic lines?
+                // Actually, just skipping paragraphs might be intended. 
+                if (line.trim().length > 0) {
+                    // keep.push(line); // aggressive: keep all?
+                    // "Cắt examples dài"
+                }
+            }
+        }
+        // Simple pass: return full content for now, but assume text is mostly bullets in rules.
+        // Better implementation: Regex for relevant sections.
+        return content; // Placeholder for advanced snippet extraction if needed.
+    }
+    deduplicateLines(text) {
+        const lines = text.split('\n');
+        const seen = new Set();
+        const out = [];
+        for (const line of lines) {
+            const trimmed = line.trim().toLowerCase();
+            // Only dedupe substantial bullet points
+            const isBullet = /^[\s]*[-*+]\s/.test(line);
+            if (isBullet && trimmed.length > 10) {
+                if (seen.has(trimmed)) {
+                    continue; // Skip duplicate
+                }
+                seen.add(trimmed);
+            }
+            out.push(line);
+        }
+        return out.join('\n');
+    }
+}
--- a/dist/rules/indexer.js
+++ b/dist/rules/indexer.js
+import { RuleLoader } from './loader.js';
+import { Scorer } from './scorer.js';
+export class RuleIndexer {
+    loader;
+    scorer;
+    cachedRules = [];
+    lastIndexed = 0;
+    constructor(baseDir) {
+        this.loader = new RuleLoader(baseDir);
+        this.scorer = new Scorer();
+    }
+    async init() {
+        await this.refresh();
+    }
+    async refresh() {
+        this.cachedRules = await this.loader.loadAllRules();
+        this.scorer.indexRules(this.cachedRules);
+        this.lastIndexed = Date.now();
+    }
+    async getBaseRule() {
+        return this.loader.loadBaseRule();
+    }
+    getRuleByIdOrPath(idOrPath) {
+        // Try ID match
+        let rule = this.cachedRules.find(r => r.id === idOrPath);
+        if (rule)
+            return rule;
+        // Try Path match (exact or relative)
+        // normalizing slashes
+        const normalized = idOrPath.replace(/\\/g, '/');
+        return this.cachedRules.find(r => r.path.replace(/\\/g, '/').endsWith(normalized));
+    }
+    search(query, options = {}) {
+        const { openFiles = [], changedFiles = [], tags = [], limit = 6, minScore = 0.15 } = options;
+        const results = this.cachedRules.map(rule => this.scorer.scoreRule(rule, query, openFiles, changedFiles, tags));
+        // Sort by score desc
+        const sorted = results
+            .filter(r => r.score >= minScore)
+            .sort((a, b) => b.score - a.score);
+        // Diversification and Deduplication logic
+        const diversified = [];
+        const categoryCounts = {};
+        const seenIds = new Set();
+        for (const res of sorted) {
+            // Deduplicate by ID
+            if (seenIds.has(res.rule.id))
+                continue;
+            // Determine category from relative path (first dir)
+            const parts = res.rule.relativePath.split(/[/\\]/);
+            const category = parts.length > 1 ? parts[0] : 'root';
+            if ((categoryCounts[category] || 0) < 2) {
+                diversified.push(res);
+                categoryCounts[category] = (categoryCounts[category] || 0) + 1;
+                seenIds.add(res.rule.id);
+            }
+            if (diversified.length >= limit)
+                break;
+        }
+        return diversified;
+    }
+}
--- a/dist/rules/loader.js
+++ b/dist/rules/loader.js
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import matter from 'gray-matter';
+import { findMarkdownFiles } from '../utils/glob.js';
+export class RuleLoader {
+    rootDir;
+    constructor(rootDir) {
+        this.rootDir = path.resolve(rootDir);
+    }
+    async loadAllRules() {
+        const files = await findMarkdownFiles(this.rootDir);
+        const rules = [];
+        for (const file of files) {
+            // Note: We used to skip base.md here, but we want it indexed for search.
+            // if (path.basename(file).toLowerCase() === 'base.md') continue;
+            try {
+                const rule = await this.parseRule(file);
+                if (rule)
+                    rules.push(rule);
+            }
+            catch (error) {
+                console.error(`Failed to parse rule file: ${file}`, error);
+            }
+        }
+        return rules;
+    }
+    async loadBaseRule() {
+        const basePath = path.join(this.rootDir, 'base.md');
+        try {
+            const content = await fs.readFile(basePath, 'utf-8');
+            return content;
+        }
+        catch (error) {
+            // If base.md doesn't exist, return empty string or default warning
+            return '';
+        }
+    }
+    async parseRule(filePath) {
+        const rawContent = await fs.readFile(filePath, 'utf-8');
+        const { data, content } = matter(rawContent);
+        const fm = data;
+        const stats = await fs.stat(filePath);
+        // Filter out rules explicitly marked to be avoided if necessary? 
+        // No, 'avoid' is a string[] field for query matching, not a flag to ignore the file itself basically. 
+        // Unless frontmatter is drastically invalid.
+        const relativePath = path.relative(this.rootDir, filePath);
+        const id = fm.id || relativePath.replace(/\\/g, '/').replace(/\.md$/, '');
+        const title = fm.title || path.basename(filePath, '.md');
+        return {
+            id,
+            path: filePath,
+            relativePath,
+            title,
+            content,
+            tags: fm.tags || [],
+            priority: typeof fm.priority === 'number' ? fm.priority : 50,
+            paths: fm.paths || [],
+            applies_when: fm.applies_when || [],
+            avoid: fm.avoid || [],
+            lastModified: stats.mtimeMs
+        };
+    }
+}
--- a/dist/rules/scorer.js
+++ b/dist/rules/scorer.js
+import { matchGlob } from '../utils/glob.js';
+export class Scorer {
+    idf = {};
+    ruleVectors = new Map();
+    constructor() { }
+    // Update internal TF-IDF model based on all rules
+    // Simplified version: Term Frequency in document * Inverse Document Frequency
+    // For small corpus (50-200 files), in-memory is fine.
+    indexRules(rules) {
+        // 1. Calculate document frequencies
+        const docFreq = {};
+        const totalDocs = rules.length;
+        rules.forEach(rule => {
+            const terms = this.tokenize(this.getRuleTextForIndex(rule));
+            const uniqueTerms = new Set(terms);
+            uniqueTerms.forEach(term => {
+                docFreq[term] = (docFreq[term] || 0) + 1;
+            });
+        });
+        // 2. Calculate IDF
+        this.idf = {};
+        Object.keys(docFreq).forEach(term => {
+            this.idf[term] = Math.log(1 + (totalDocs / (docFreq[term] || 1)));
+        });
+        // 3. Pre-calculate TF vectors for each rule
+        this.ruleVectors.clear();
+        rules.forEach(rule => {
+            const terms = this.tokenize(this.getRuleTextForIndex(rule));
+            const tf = {};
+            const docLen = terms.length;
+            terms.forEach(term => {
+                tf[term] = (tf[term] || 0) + 1;
+            });
+            // Normalize TF? Or just use raw count? Let's use simple TF (count/len) * IDF
+            const vec = {};
+            Object.keys(tf).forEach(term => {
+                vec[term] = (tf[term] / docLen) * (this.idf[term] || 0);
+            });
+            this.ruleVectors.set(rule.id, vec);
+        });
+    }
+    getRuleTextForIndex(rule) {
+        // Boost title and tags by repeating them
+        return `${rule.title} ${rule.title} ${rule.tags.join(' ')} ${rule.tags.join(' ')} ${rule.content}`;
+    }
+    tokenize(text) {
+        return text.toLowerCase()
+            .replace(/[^\p{L}\p{N}\s]/gu, ' ')
+            .split(/\s+/)
+            .filter(t => t.length >= 2);
+    }
+    // --- Scoring Components ---
+    calculateTextScore(query, rule) {
+        const queryTerms = this.tokenize(query);
+        const ruleId = rule.id;
+        const ruleVec = this.ruleVectors.get(ruleId);
+        if (!ruleVec || queryTerms.length === 0)
+            return 0;
+        let score = 0;
+        const ruleText = this.getRuleTextForIndex(rule).toLowerCase();
+        queryTerms.forEach(term => {
+            // Exact TF-IDF match
+            if (ruleVec[term]) {
+                score += ruleVec[term];
+            }
+            // Substring match boost (helps with typos or partial words like 'frond end' -> 'frontend')
+            else if (ruleText.includes(term)) {
+                score += 0.1; // Increased constant boost for partial matches
+            }
+        });
+        let magnitudeQuery = Math.sqrt(queryTerms.length);
+        let magnitudeDoc = 0;
+        Object.values(ruleVec).forEach(v => magnitudeDoc += v * v);
+        magnitudeDoc = Math.sqrt(magnitudeDoc);
+        if (magnitudeDoc === 0 || magnitudeQuery === 0)
+            return 0;
+        // Rescale score - using a more sensitive factor
+        return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 1.5 + 0.1));
+    }
+    calculatePathScore(rule, openFiles, changedFiles) {
+        if (!rule.paths || rule.paths.length === 0)
+            return 0;
+        const openMatches = openFiles.some(f => matchGlob(f, rule.paths));
+        const changedMatches = changedFiles.some(f => matchGlob(f, rule.paths));
+        if (changedMatches)
+            return 1.0;
+        if (openMatches)
+            return 0.6;
+        return 0;
+    }
+    calculateTagScore(rule, queryTags) {
+        if (!queryTags || queryTags.length === 0)
+            return 0;
+        const intersection = rule.tags.filter(t => queryTags.includes(t));
+        return intersection.length > 0 ? Math.min(1, intersection.length / queryTags.length) : 0;
+    }
+    calculatePriorityScore(rule) {
+        return (rule.priority || 50) / 100;
+    }
+    // --- Main Score Function ---
+    scoreRule(rule, query, openFiles = [], changedFiles = [], queryTags = []) {
+        const sText = this.calculateTextScore(query, rule);
+        const sPath = this.calculatePathScore(rule, openFiles, changedFiles);
+        const sTag = this.calculateTagScore(rule, queryTags);
+        const sPriority = this.calculatePriorityScore(rule);
+        // avoid penalty
+        let penalty = 0;
+        if (rule.avoid && rule.avoid.length > 0) {
+            const avoidMatchesQuery = rule.avoid.some(avoidTerm => query.toLowerCase().includes(avoidTerm.toLowerCase()));
+            if (avoidMatchesQuery)
+                penalty = 0.5;
+        }
+        // Boost heuristics
+        let boost = 0;
+        const q = query.toLowerCase();
+        // Testing boost
+        if (/test|vitest|msw|coverage|kiểm tra/.test(q) && rule.tags.includes('testing'))
+            boost += 0.2;
+        // Frontend / React boost
+        if (/react|component|hook|tsx|frontend|giao diện/.test(q.replace(/\s+/g, '')) || /frond\s*end/.test(q)) {
+            if (rule.relativePath.includes('frontend') || rule.tags.includes('frontend') || rule.tags.includes('react')) {
+                boost += 0.2;
+            }
+        }
+        // Creator / Author boost
+        if (/creator|author|người viết|tác giả|ai viết/.test(q)) {
+            if (rule.content.toLowerCase().includes('creator') || rule.content.toLowerCase().includes('author')) {
+                boost += 0.3;
+            }
+        }
+        const weightedScore = (0.55 * sText) +
+            (0.25 * sPath) +
+            (0.12 * sTag) +
+            (0.08 * sPriority);
+        const finalScore = Math.max(0, weightedScore + boost - penalty);
+        return {
+            rule,
+            score: finalScore,
+            scoreBreakdown: {
+                text: sText,
+                path: sPath,
+                tag: sTag,
+                priority: sPriority
+            }
+        };
+    }
+}
--- a/dist/tests/runner.js
+++ b/dist/tests/runner.js
+import { Scorer } from '../rules/scorer.js';
+import { Composer } from '../rules/composer.js';
+import { RuleIndexer } from '../rules/indexer.js';
+import * as assert from 'assert';
+// Simple test helper
+function test(name, fn) {
+    try {
+        fn();
+        console.log(`✅ ${name}`);
+    }
+    catch (e) {
+        console.error(`❌ ${name}`);
+        console.error(e);
+    }
+}
+const mockRule = {
+    id: 'test-rule',
+    path: '/abs/test/frontend/rule.md',
+    relativePath: 'frontend/rule.md',
+    title: 'Test Rule',
+    content: 'Rule content',
+    tags: ['react', 'testing'],
+    priority: 80,
+    paths: ['**/*.tsx'],
+    applies_when: [],
+    avoid: [],
+    lastModified: 0
+};
+async function runTests() {
+    console.log('--- Running Scorer Tests ---');
+    // Scorer Path Tests
+    const scorer = new Scorer();
+    // We need to index rules first to initialize vectors if we test text score, 
+    // but path score is independent.
+    scorer.indexRules([mockRule]);
+    test('Path Score: Exact Match', () => {
+        const score = scorer.calculatePathScore(mockRule, ['/src/app.tsx'], ['/src/component.tsx']);
+        // Changed file matches glob **/*.tsx
+        assert.ok(score >= 1.0, 'Should be 1.0 for changed file match');
+    });
+    test('Path Score: Open Match', () => {
+        const score = scorer.calculatePathScore(mockRule, ['/src/app.tsx'], []);
+        assert.ok(score >= 0.6, 'Should be 0.6 for open file match');
+    });
+    test('Path Score: No Match', () => {
+        const score = scorer.calculatePathScore(mockRule, ['/src/main.py'], []);
+        assert.strictEqual(score, 0, 'Should be 0 for no match');
+    });
+    console.log('--- Running Composer Tests ---');
+    const composer = new Composer('.');
+    test('Deduplication', () => {
+        const text = `
+- Keep this
+- Dedupe this
+- Dedupe this
+- Keep this too
+`;
+        const result = composer.deduplicateLines(text);
+        const lines = result.split('\n').map((l) => l.trim()).filter((l) => l);
+        assert.strictEqual(lines.length, 3, 'Should remove 1 duplicate line');
+    });
+    console.log('--- Real File Indexing Test ---');
+    const indexer = new RuleIndexer(process.cwd());
+    await indexer.init(); // Load real files
+    const results = indexer.search('react component', {
+        openFiles: ['src/App.tsx']
+    });
+    console.log(`Found ${results.length} rules for "react component"`);
+    results.forEach(r => console.log(`- ${r.rule.title} (Score: ${r.score.toFixed(2)})`));
+    // Check if we found the new rules
+    const foundReact = results.some(r => r.rule.id === 'frontend-react' || r.rule.title.includes('React'));
+    assert.ok(foundReact, 'Should find React rules in sample files');
+    const baseContent = await indexer.getBaseRule();
+    assert.ok(baseContent.length > 0, 'Base rule should be loaded');
+    console.log('--- All Tests Finished ---');
+}
+runTests().catch(console.error);
--- a/dist/types.js
+++ b/dist/types.js
+export {};
--- a/dist/utils/glob.js
+++ b/dist/utils/glob.js
+import { minimatch } from 'minimatch';
+import { glob } from 'glob';
+import * as fs from 'fs';
+export const findMarkdownFiles = async (cwd) => {
+    // Find all markdown files, ignoring node_modules and dot folders
+    const files = await glob('**/*.md', {
+        cwd,
+        ignore: ['**/node_modules/**', '**/.*/**', '**/Library/**', '**/Library/Application Support/**'],
+        absolute: true,
+        follow: false, // Don't follow symlinks to avoid duplication
+    });
+    // Use realpath to deduplicate and ensure unique files
+    const uniqueFiles = Array.from(new Set(files.map(f => {
+        try {
+            return fs.realpathSync(f);
+        }
+        catch {
+            return f;
+        }
+    })));
+    return uniqueFiles;
+};
+export const matchGlob = (filePath, patterns) => {
+    if (!patterns || patterns.length === 0)
+        return false;
+    // Normalize windows paths for matching
+    const normalizedPath = filePath.replace(/\\/g, '/');
+    for (const pattern of patterns) {
+        if (minimatch(normalizedPath, pattern, { dot: true, matchBase: true }))
+            return true;
+    }
+    return false;
+};
--- a/frontend/react.md
+++ b/frontend/react.md
@@ -32,3 +32,5 @@ export const UserProfile = ({ userId, isActive = false }: Props) => {
 ## Performance
 - Use `useMemo` and `useCallback` only when necessary to prevent expensive re-renders or stabilize references.
 - Lazy load routes and heavy components using `React.lazy`.
+
+The creator of this rule: LE DANG LONG
\ No newline at end of file
--- a/package.json
+++ b/package.json
@@ -3,6 +3,7 @@
  "version": "1.0.0",
  "description": "",
  "main": "dist/index.js",
+  "type": "module",
  "scripts": {
    "build": "tsc",
    "prestart": "npm run build",

--- a/src/index.ts
+++ b/src/index.ts
@@ -12,8 +12,12 @@ import { RuleIndexer } from './rules/indexer.js';
 import { Composer } from './rules/composer.js';
 import * as path from 'path';

+import { fileURLToPath } from 'url';
+
 // --- Configuration ---
-const ROOT_DIR = process.cwd(); // Assume running from project root
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const ROOT_DIR = path.resolve(__dirname, '..'); // storage root is one level up from dist

 // --- Initialization ---
 const indexer = new RuleIndexer(ROOT_DIR);
@@ -142,6 +146,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                        title: r.rule.title,
                        score: r.score.toFixed(2),
                        tags: r.rule.tags,
+                        content: r.rule.content,
                        why: `Text:${r.scoreBreakdown.text.toFixed(2)} Path:${r.scoreBreakdown.path.toFixed(2)} Tag:${r.scoreBreakdown.tag.toFixed(2)}`
                    })), null, 2),
                },

--- a/src/rules/composer.ts
+++ b/src/rules/composer.ts
 import { Rule, RuleBundle } from '../types.js';
 import { RuleLoader } from './loader.js';
+import * as path from 'path';

 export class Composer {
    private loader: RuleLoader;
@@ -43,7 +44,9 @@ export class Composer {
        // Let's write this class to accept *resolved* array of Rules.
        // If the tool definition says input is just ids, the MCP handler will use Indexer to get the Rules, then pass them here.

-        const rulesToInclude = selectedRules.map(s => s.rule).filter((r): r is Rule => !!r);
+        const rulesToInclude = selectedRules
+            .map(s => s.rule)
+            .filter((r): r is Rule => !!r && r.id !== 'global-base' && path.basename(r.path) !== 'base.md');

        // 3. Start composing
        let bundle = `# RULE BUNDLE\n> [!IMPORTANT] Priority Order: Selected rules override Base rules where conflicting. Security constraints in Base rules are non-negotiable.\n\n`;

--- a/src/rules/indexer.ts
+++ b/src/rules/indexer.ts
@@ -59,11 +59,15 @@ export class RuleIndexer {
            .filter(r => r.score >= minScore)
            .sort((a, b) => b.score - a.score);

-        // Diversification logic: max 2 rules per top-level folder
+        // Diversification and Deduplication logic
        const diversified: SearchResult[] = [];
        const categoryCounts: { [cat: string]: number } = {};
+        const seenIds = new Set<string>();

        for (const res of sorted) {
+            // Deduplicate by ID
+            if (seenIds.has(res.rule.id)) continue;
+
            // Determine category from relative path (first dir)
            const parts = res.rule.relativePath.split(/[/\\]/);
            const category = parts.length > 1 ? parts[0] : 'root';
@@ -71,6 +75,7 @@ export class RuleIndexer {
            if ((categoryCounts[category] || 0) < 2) {
                diversified.push(res);
                categoryCounts[category] = (categoryCounts[category] || 0) + 1;
+                seenIds.add(res.rule.id);
            }

            if (diversified.length >= limit) break;

--- a/src/rules/loader.ts
+++ b/src/rules/loader.ts
@@ -16,7 +16,8 @@ export class RuleLoader {
        const rules: Rule[] = [];

        for (const file of files) {
-            if (path.basename(file).toLowerCase() === 'base.md') continue;
+            // Note: We used to skip base.md here, but we want it indexed for search.
+            // if (path.basename(file).toLowerCase() === 'base.md') continue;

            try {
                const rule = await this.parseRule(file);

--- a/src/rules/scorer.ts
+++ b/src/rules/scorer.ts
@@ -53,55 +53,48 @@ export class Scorer {
    }

    private getRuleTextForIndex(rule: Rule): string {
-        return `${rule.title} ${rule.tags.join(' ')} ${rule.content}`;
+        // Boost title and tags by repeating them
+        return `${rule.title} ${rule.title} ${rule.tags.join(' ')} ${rule.tags.join(' ')} ${rule.content}`;
    }

    private tokenize(text: string): string[] {
        return text.toLowerCase()
-            .replace(/[^a-z0-9\s]/g, '')
+            .replace(/[^\p{L}\p{N}\s]/gu, ' ')
            .split(/\s+/)
-            .filter(t => t.length > 2);
+            .filter(t => t.length >= 2);
    }

    // --- Scoring Components ---

-    private calculateTextScore(query: string, ruleId: string): number {
+    private calculateTextScore(query: string, rule: Rule): number {
        const queryTerms = this.tokenize(query);
+        const ruleId = rule.id;
        const ruleVec = this.ruleVectors.get(ruleId);
        if (!ruleVec || queryTerms.length === 0) return 0;

        let score = 0;
+        const ruleText = this.getRuleTextForIndex(rule).toLowerCase();
+
        queryTerms.forEach(term => {
+            // Exact TF-IDF match
            if (ruleVec[term]) {
                score += ruleVec[term];
            }
+            // Substring match boost (helps with typos or partial words like 'frond end' -> 'frontend')
+            else if (ruleText.includes(term)) {
+                score += 0.1; // Increased constant boost for partial matches
+            }
        });

-        // Normalize score somewhat? 
-        // TF-IDF summing can go > 1. Let's clamp or sigmoid it? 
-        // Or just simple normalization if creating a relative ranking.
-        // For now, let's assume raw score is okay but maybe cap at 1.0 for the weighted sum formula
-        // because S_text is expected to be 0..1 in the prompt.
-        // A simple heuristic normalization: divide by max theoretical score or just 10?
-        // Let's use a simpler overlap metric for S_text if TF-IDF is too unbounded.
-        // Actually, BM25 returns unbounded scores usually.
-        // Let's check overlap of terms? 
-        // "S_text: ... normalize 0..1"
-        // Let's try cosine similarity between query and doc?
-        // Query vector: tf=1 for all terms.
-
-        // Simple Jaccard/Overlap for robust 0-1?
-        // Let's do a localized TF-IDF cosine approx.
-        let magnitudeQuery = Math.sqrt(queryTerms.length); // approx
+        let magnitudeQuery = Math.sqrt(queryTerms.length);
        let magnitudeDoc = 0;
        Object.values(ruleVec).forEach(v => magnitudeDoc += v * v);
        magnitudeDoc = Math.sqrt(magnitudeDoc);

        if (magnitudeDoc === 0 || magnitudeQuery === 0) return 0;

-        // Rescale score to 0-1 range roughly
-        // Cosine similarity = dot_product / (magA * magB)
-        return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 5 + 0.1)); // Fudge factor
+        // Rescale score - using a more sensitive factor
+        return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 1.5 + 0.1));
    }

    private calculatePathScore(rule: Rule, openFiles: string[], changedFiles: string[]): number {
@@ -134,7 +127,7 @@ export class Scorer {
        changedFiles: string[] = [],
        queryTags: string[] = []
    ): SearchResult {
-        const sText = this.calculateTextScore(query, rule.id);
+        const sText = this.calculateTextScore(query, rule);
        const sPath = this.calculatePathScore(rule, openFiles, changedFiles);
        const sTag = this.calculateTagScore(rule, queryTags);
        const sPriority = this.calculatePriorityScore(rule);
@@ -142,11 +135,6 @@ export class Scorer {
        // avoid penalty
        let penalty = 0;
        if (rule.avoid && rule.avoid.length > 0) {
-            // If query or files match avoid criteria. 
-            // Simple text match of generic terms in avoid list against query?
-            // Or if file path matches avoid glob?
-            // Prompt: "Penalty if query/file match with avoid"
-            // Let's assume avoid contains keywords or globs.
            const avoidMatchesQuery = rule.avoid.some(avoidTerm => query.toLowerCase().includes(avoidTerm.toLowerCase()));
            if (avoidMatchesQuery) penalty = 0.5;
        }
@@ -155,9 +143,22 @@ export class Scorer {
        let boost = 0;
        const q = query.toLowerCase();

-        if (/test|vitest|msw|coverage/.test(q) && rule.tags.includes('testing')) boost += 0.2;
-        if (/react|component|hook|tsx/.test(q) && (rule.relativePath.includes('frontend') || rule.tags.includes('react'))) boost += 0.2;
-        if (/graphql|mutation|schema/.test(q) && (rule.relativePath.includes('backend') || rule.tags.includes('graphql'))) boost += 0.2;
+        // Testing boost
+        if (/test|vitest|msw|coverage|kiểm tra/.test(q) && rule.tags.includes('testing')) boost += 0.2;
+
+        // Frontend / React boost
+        if (/react|component|hook|tsx|frontend|giao diện/.test(q.replace(/\s+/g, '')) || /frond\s*end/.test(q)) {
+            if (rule.relativePath.includes('frontend') || rule.tags.includes('frontend') || rule.tags.includes('react')) {
+                boost += 0.2;
+            }
+        }
+
+        // Creator / Author boost
+        if (/creator|author|người viết|tác giả|ai viết/.test(q)) {
+            if (rule.content.toLowerCase().includes('creator') || rule.content.toLowerCase().includes('author')) {
+                boost += 0.3;
+            }
+        }

        const weightedScore =
            (0.55 * sText) +

--- a/src/utils/glob.ts
+++ b/src/utils/glob.ts
 import { minimatch } from 'minimatch';
 import { glob } from 'glob';
+import * as fs from 'fs';

 export const findMarkdownFiles = async (cwd: string): Promise<string[]> => {
    // Find all markdown files, ignoring node_modules and dot folders
    const files = await glob('**/*.md', {
        cwd,
-        ignore: ['**/node_modules/**', '**/.*/**'],
+        ignore: ['**/node_modules/**', '**/.*/**', '**/Library/**', '**/Library/Application Support/**'],
        absolute: true,
+        follow: false, // Don't follow symlinks to avoid duplication
    });
-    return files;
+
+    // Use realpath to deduplicate and ensure unique files
+    const uniqueFiles = Array.from(new Set(files.map(f => {
+        try {
+            return fs.realpathSync(f);
+        } catch {
+            return f;
+        }
+    })));
+
+    return uniqueFiles;
 };

 export const matchGlob = (filePath: string, patterns: string[]): boolean => {