Commit 5fa44c30 authored by Long Lê Đăng's avatar Long Lê Đăng

feat: Triển khai hệ thống lập chỉ mục và chấm điểm quy tắc mới, bao gồm các...

feat: Triển khai hệ thống lập chỉ mục và chấm điểm quy tắc mới, bao gồm các module lập chỉ mục, chấm điểm, soạn thảo và trình chạy thử nghiệm.
parent 58201380
......@@ -4,7 +4,7 @@ node_modules
.pnp.js
# Production
dist/
# dist/
build/
# Misc
......
# CLAUDE.md - Rules & Guidelines
## 🚨 MANDATORY INITIAL STEP 🚨
**Before** generating any code or answering a complex request, you **MUST** first consult the project's rule database via the MCP server:
1. **EXECUTE TOOL**: `rules_search`
- `query`: The user's exact request or a summary of the task.
- `limit`: 5 (or more if complex).
- `openFiles`: Pass the currently open file paths if relevant.
2. **APPLY RULES**:
- Carefully read the returned rules.
- **Prioritize** these rules over your default training.
- If the rules specify coding styles, architecture patterns, or specific libraries, **OBEY THEM**.
---
## Development Info (For this MCP Project)
- **Build**: `npm run build`
- **Start**: `npm start`
- **Watch**: `npm run watch` (if available) or `tsc -w`
- **Test**: `npm test`
# CLAUDE.md - Client Project Template
> **Usage**: Copy this file to the root of your project as `CLAUDE.md` to enforce rule retrieval via the Awing Rules MCP.
## 🚨 MANDATORY INITIAL STEP 🚨
**Before** executing any task, editing code, or answering complex questions, you **MUST** consult the project's centralized rule database via the connected MCP server:
1. **EXECUTE TOOL**: `rules_search` (from `awing-rules-claudecode-mcp`)
- `query`: The user's exact request, task description, or question.
- `limit`: 5 (default).
- `openFiles`: Pass the list of currently open file paths.
- `changedFiles`: (Optional) Pass list of recently changed files if available.
2. **APPLY RULES**:
- **Read Carefully**: The tool returns rules specifically relevant to your current context.
- **Prioritize**: These rules **override** your default training and general best practices.
- **Obey**: Strictly follow any coding styles, architecture patterns, or library constraints defined in the returned rules.
---
## Project Commands
*Replace the below with your project-specific commands*
- **Build**: `npm run build`
- **Test**: `npm test`
- **Start**: `npm start`
......@@ -22,3 +22,5 @@ paths: ["**/*"]
## Git & Version Control
- Commit messages must be descriptive (e.g., `feat: check user permissions`).
- Do not commit generated files or secrets.
Answer in Vietnamese
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js';
import { z } from 'zod';
import { RuleIndexer } from './rules/indexer.js';
import { Composer } from './rules/composer.js';
import * as path from 'path';
import { fileURLToPath } from 'url';
// --- Configuration ---
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const ROOT_DIR = path.resolve(__dirname, '..'); // storage root is one level up from dist
// --- Initialization ---
const indexer = new RuleIndexer(ROOT_DIR);
const composer = new Composer(ROOT_DIR);
// Init index on startup (async but we assume fast enough or lazy)
indexer.init().catch(err => console.error('Failed to init index:', err));
const server = new Server({
name: 'awing-rules-claudecode-mcp',
version: '0.1.0',
}, {
capabilities: {
tools: {},
},
});
// --- Tool Definitions ---
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: 'rules_search',
description: 'Search for relevant project rules based on query and context.',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string', description: 'The search query or user task description' },
openFiles: { type: 'array', items: { type: 'string' }, description: 'List of currently open file paths' },
changedFiles: { type: 'array', items: { type: 'string' }, description: 'List of recently changed file paths' },
tags: { type: 'array', items: { type: 'string' }, description: 'Filter by tags' },
limit: { type: 'number', description: 'Max number of results to return (default 6)' },
minScore: { type: 'number', description: 'Minimum score threshold (default 0.15)' }
},
required: ['query'],
},
},
{
name: 'rules_get',
description: 'Get details of a specific rule by ID or path.',
inputSchema: {
type: 'object',
properties: {
id: { type: 'string' },
path: { type: 'string' },
mode: { type: 'string', enum: ['full', 'snippet', 'sections'], default: 'snippet' }
},
},
},
{
name: 'rules_compose',
description: 'Compose a rule bundle from selected rules and the base rule.',
inputSchema: {
type: 'object',
properties: {
selected: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string' },
path: { type: 'string' }
}
}
},
mode: { type: 'string', enum: ['full', 'snippet'], default: 'snippet' },
dedupe: { type: 'boolean', default: true },
maxChars: { type: 'number', default: 12000 }
},
required: ['selected'],
},
},
{
name: 'rules_refresh',
description: 'Refresh the rule index from disk.',
inputSchema: {
type: 'object',
properties: {},
},
},
],
};
});
// --- Tool Handlers ---
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
if (name === 'rules_refresh') {
await indexer.refresh();
return {
content: [{ type: 'text', text: 'Rule index refreshed successfully.' }]
};
}
if (name === 'rules_search') {
const input = z.object({
query: z.string(),
openFiles: z.array(z.string()).optional(),
changedFiles: z.array(z.string()).optional(),
tags: z.array(z.string()).optional(),
limit: z.number().optional(),
minScore: z.number().optional()
}).parse(args);
const results = indexer.search(input.query, {
openFiles: input.openFiles,
changedFiles: input.changedFiles,
tags: input.tags,
limit: input.limit,
minScore: input.minScore
});
return {
content: [
{
type: 'text',
text: JSON.stringify(results.map(r => ({
id: r.rule.id,
path: r.rule.relativePath,
title: r.rule.title,
score: r.score.toFixed(2),
tags: r.rule.tags,
content: r.rule.content,
why: `Text:${r.scoreBreakdown.text.toFixed(2)} Path:${r.scoreBreakdown.path.toFixed(2)} Tag:${r.scoreBreakdown.tag.toFixed(2)}`
})), null, 2),
},
],
};
}
if (name === 'rules_get') {
const input = z.object({
id: z.string().optional(),
path: z.string().optional(),
mode: z.string().optional(),
}).parse(args);
const rule = indexer.getRuleByIdOrPath(input.id || input.path || '');
if (!rule) {
return { isError: true, content: [{ type: 'text', text: 'Rule not found' }] };
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
id: rule.id,
path: rule.relativePath,
title: rule.title,
content: rule.content // TODO: Apply mode/snippet logic if needed
}, null, 2),
},
],
};
}
if (name === 'rules_compose') {
const input = z.object({
selected: z.array(z.object({ id: z.string().optional(), path: z.string().optional() })),
mode: z.enum(['full', 'snippet']).optional(),
dedupe: z.boolean().optional(),
maxChars: z.number().optional()
}).parse(args);
// Resolve rules
const resolvedRules = input.selected.map(sel => {
const rule = indexer.getRuleByIdOrPath(sel.id || sel.path || '');
return { ...sel, rule };
});
const bundle = await composer.compose(resolvedRules, {
mode: input.mode,
dedupe: input.dedupe,
maxChars: input.maxChars
});
return {
content: [
{
type: 'text',
text: bundle.content,
},
],
};
}
throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
});
async function runServer() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error('Moving Rules MCP Server running on stdio');
}
runServer().catch((error) => {
console.error('Fatal error running server:', error);
process.exit(1);
});
import { RuleLoader } from './loader.js';
import * as path from 'path';
export class Composer {
loader;
constructor(baseDir) {
this.loader = new RuleLoader(baseDir);
}
async compose(selectedRules, options = {}) {
const { maxChars = 12000, dedupe = true, mode = 'snippet' } = options;
// 1. Load Base Rule
const baseContent = await this.loader.loadBaseRule();
// 2. Resolve Selected Rules
// The indexer might have passed the full Rule object or just ID/Path.
// Ideally, the caller (MCP tool) resolves the rules first using Indexer, or passes them in.
// For simplicity, let's assume 'selectedRules' contains the Rule object if passed from search,
// or we might need to look them up. But Composer doesn't have the Indexer reference.
// Design choice: MCP server does the lookup. Composer just formats.
// So we expect `rule` property to be present or we can't extract info.
// Actually, let's make the input strictly `Rule[]`.
// Refactoring spec: input `selected` is [{id?, path?}].
// Composer needs access to Indexer or Loader to fetch content if not provided?
// Let's assume the MCP layer fetches the rules.
// "Input: selected: [{ id?: string, path?: string }]".
// WE need to fetch them. So Composer should take an Indexer or Loader.
// But Loader is expensive to re-scan. Indexer has cache.
// Let's pass the *resolved* rules to the compose function to keep it pure(r).
// Or simpler: Composer owns the formatting string logic, MCP server orchestration handles lookup.
// Let's write this class to accept *resolved* array of Rules.
// If the tool definition says input is just ids, the MCP handler will use Indexer to get the Rules, then pass them here.
const rulesToInclude = selectedRules
.map(s => s.rule)
.filter((r) => !!r && r.id !== 'global-base' && path.basename(r.path) !== 'base.md');
// 3. Start composing
let bundle = `# RULE BUNDLE\n> [!IMPORTANT] Priority Order: Selected rules override Base rules where conflicting. Security constraints in Base rules are non-negotiable.\n\n`;
// Add Base
bundle += `## BASE RULES\n\n${baseContent}\n\n`;
// Add Selected
const used = [];
const notes = [];
for (const rule of rulesToInclude) {
bundle += `## ${rule.title} (ID: ${rule.id})\nTitle: ${rule.title}\nSource: ${rule.relativePath}\n\n`;
let contentToAdd = rule.content;
// Snippet mode logic
if (mode === 'snippet') {
contentToAdd = this.extractSnippet(contentToAdd);
}
bundle += contentToAdd + `\n\n---\n\n`;
used.push({ id: rule.id, path: rule.path, title: rule.title });
}
// 4. Deduplication
if (dedupe) {
bundle = this.deduplicateLines(bundle);
}
// 5. Truncation
let truncated = false;
if (bundle.length > maxChars) {
bundle = bundle.slice(0, maxChars) + '\n... (truncated)';
truncated = true;
notes.push(`Output truncated to ${maxChars} chars.`);
}
return {
content: bundle,
used,
truncated,
notes
};
}
extractSnippet(content) {
// Keep headings, bullets, blockquotes
const lines = content.split('\n');
const keep = [];
let insideCodeBlock = false;
for (const line of lines) {
if (line.trim().startsWith('```')) {
insideCodeBlock = !insideCodeBlock;
// In snippet mode, maybe omit long code blocks?
// Let's keep them if they are short?
// For now, keep everything but maybe limit length?
keep.push(line);
continue;
}
const isHeader = /^#+\s/.test(line);
const isList = /^[\s]*[-*+]\s/.test(line);
const isQuote = /^>\s/.test(line); // e.g. alerts
if (isHeader || isList || isQuote || insideCodeBlock) {
keep.push(line);
}
else {
// Plain text? Skip in harsh snippet mode?
// Let's keep it if it's short, or skip?
// Prompt says: "Prioritize headings + bullet lines + Do/Don't"
// Let's conservatively keep non-empty logic lines?
// Actually, just skipping paragraphs might be intended.
if (line.trim().length > 0) {
// keep.push(line); // aggressive: keep all?
// "Cắt examples dài"
}
}
}
// Simple pass: return full content for now, but assume text is mostly bullets in rules.
// Better implementation: Regex for relevant sections.
return content; // Placeholder for advanced snippet extraction if needed.
}
deduplicateLines(text) {
const lines = text.split('\n');
const seen = new Set();
const out = [];
for (const line of lines) {
const trimmed = line.trim().toLowerCase();
// Only dedupe substantial bullet points
const isBullet = /^[\s]*[-*+]\s/.test(line);
if (isBullet && trimmed.length > 10) {
if (seen.has(trimmed)) {
continue; // Skip duplicate
}
seen.add(trimmed);
}
out.push(line);
}
return out.join('\n');
}
}
import { RuleLoader } from './loader.js';
import { Scorer } from './scorer.js';
export class RuleIndexer {
loader;
scorer;
cachedRules = [];
lastIndexed = 0;
constructor(baseDir) {
this.loader = new RuleLoader(baseDir);
this.scorer = new Scorer();
}
async init() {
await this.refresh();
}
async refresh() {
this.cachedRules = await this.loader.loadAllRules();
this.scorer.indexRules(this.cachedRules);
this.lastIndexed = Date.now();
}
async getBaseRule() {
return this.loader.loadBaseRule();
}
getRuleByIdOrPath(idOrPath) {
// Try ID match
let rule = this.cachedRules.find(r => r.id === idOrPath);
if (rule)
return rule;
// Try Path match (exact or relative)
// normalizing slashes
const normalized = idOrPath.replace(/\\/g, '/');
return this.cachedRules.find(r => r.path.replace(/\\/g, '/').endsWith(normalized));
}
search(query, options = {}) {
const { openFiles = [], changedFiles = [], tags = [], limit = 6, minScore = 0.15 } = options;
const results = this.cachedRules.map(rule => this.scorer.scoreRule(rule, query, openFiles, changedFiles, tags));
// Sort by score desc
const sorted = results
.filter(r => r.score >= minScore)
.sort((a, b) => b.score - a.score);
// Diversification and Deduplication logic
const diversified = [];
const categoryCounts = {};
const seenIds = new Set();
for (const res of sorted) {
// Deduplicate by ID
if (seenIds.has(res.rule.id))
continue;
// Determine category from relative path (first dir)
const parts = res.rule.relativePath.split(/[/\\]/);
const category = parts.length > 1 ? parts[0] : 'root';
if ((categoryCounts[category] || 0) < 2) {
diversified.push(res);
categoryCounts[category] = (categoryCounts[category] || 0) + 1;
seenIds.add(res.rule.id);
}
if (diversified.length >= limit)
break;
}
return diversified;
}
}
import * as fs from 'fs/promises';
import * as path from 'path';
import matter from 'gray-matter';
import { findMarkdownFiles } from '../utils/glob.js';
export class RuleLoader {
rootDir;
constructor(rootDir) {
this.rootDir = path.resolve(rootDir);
}
async loadAllRules() {
const files = await findMarkdownFiles(this.rootDir);
const rules = [];
for (const file of files) {
// Note: We used to skip base.md here, but we want it indexed for search.
// if (path.basename(file).toLowerCase() === 'base.md') continue;
try {
const rule = await this.parseRule(file);
if (rule)
rules.push(rule);
}
catch (error) {
console.error(`Failed to parse rule file: ${file}`, error);
}
}
return rules;
}
async loadBaseRule() {
const basePath = path.join(this.rootDir, 'base.md');
try {
const content = await fs.readFile(basePath, 'utf-8');
return content;
}
catch (error) {
// If base.md doesn't exist, return empty string or default warning
return '';
}
}
async parseRule(filePath) {
const rawContent = await fs.readFile(filePath, 'utf-8');
const { data, content } = matter(rawContent);
const fm = data;
const stats = await fs.stat(filePath);
// Filter out rules explicitly marked to be avoided if necessary?
// No, 'avoid' is a string[] field for query matching, not a flag to ignore the file itself basically.
// Unless frontmatter is drastically invalid.
const relativePath = path.relative(this.rootDir, filePath);
const id = fm.id || relativePath.replace(/\\/g, '/').replace(/\.md$/, '');
const title = fm.title || path.basename(filePath, '.md');
return {
id,
path: filePath,
relativePath,
title,
content,
tags: fm.tags || [],
priority: typeof fm.priority === 'number' ? fm.priority : 50,
paths: fm.paths || [],
applies_when: fm.applies_when || [],
avoid: fm.avoid || [],
lastModified: stats.mtimeMs
};
}
}
import { matchGlob } from '../utils/glob.js';
export class Scorer {
idf = {};
ruleVectors = new Map();
constructor() { }
// Update internal TF-IDF model based on all rules
// Simplified version: Term Frequency in document * Inverse Document Frequency
// For small corpus (50-200 files), in-memory is fine.
indexRules(rules) {
// 1. Calculate document frequencies
const docFreq = {};
const totalDocs = rules.length;
rules.forEach(rule => {
const terms = this.tokenize(this.getRuleTextForIndex(rule));
const uniqueTerms = new Set(terms);
uniqueTerms.forEach(term => {
docFreq[term] = (docFreq[term] || 0) + 1;
});
});
// 2. Calculate IDF
this.idf = {};
Object.keys(docFreq).forEach(term => {
this.idf[term] = Math.log(1 + (totalDocs / (docFreq[term] || 1)));
});
// 3. Pre-calculate TF vectors for each rule
this.ruleVectors.clear();
rules.forEach(rule => {
const terms = this.tokenize(this.getRuleTextForIndex(rule));
const tf = {};
const docLen = terms.length;
terms.forEach(term => {
tf[term] = (tf[term] || 0) + 1;
});
// Normalize TF? Or just use raw count? Let's use simple TF (count/len) * IDF
const vec = {};
Object.keys(tf).forEach(term => {
vec[term] = (tf[term] / docLen) * (this.idf[term] || 0);
});
this.ruleVectors.set(rule.id, vec);
});
}
getRuleTextForIndex(rule) {
// Boost title and tags by repeating them
return `${rule.title} ${rule.title} ${rule.tags.join(' ')} ${rule.tags.join(' ')} ${rule.content}`;
}
tokenize(text) {
return text.toLowerCase()
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
.split(/\s+/)
.filter(t => t.length >= 2);
}
// --- Scoring Components ---
calculateTextScore(query, rule) {
const queryTerms = this.tokenize(query);
const ruleId = rule.id;
const ruleVec = this.ruleVectors.get(ruleId);
if (!ruleVec || queryTerms.length === 0)
return 0;
let score = 0;
const ruleText = this.getRuleTextForIndex(rule).toLowerCase();
queryTerms.forEach(term => {
// Exact TF-IDF match
if (ruleVec[term]) {
score += ruleVec[term];
}
// Substring match boost (helps with typos or partial words like 'frond end' -> 'frontend')
else if (ruleText.includes(term)) {
score += 0.1; // Increased constant boost for partial matches
}
});
let magnitudeQuery = Math.sqrt(queryTerms.length);
let magnitudeDoc = 0;
Object.values(ruleVec).forEach(v => magnitudeDoc += v * v);
magnitudeDoc = Math.sqrt(magnitudeDoc);
if (magnitudeDoc === 0 || magnitudeQuery === 0)
return 0;
// Rescale score - using a more sensitive factor
return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 1.5 + 0.1));
}
calculatePathScore(rule, openFiles, changedFiles) {
if (!rule.paths || rule.paths.length === 0)
return 0;
const openMatches = openFiles.some(f => matchGlob(f, rule.paths));
const changedMatches = changedFiles.some(f => matchGlob(f, rule.paths));
if (changedMatches)
return 1.0;
if (openMatches)
return 0.6;
return 0;
}
calculateTagScore(rule, queryTags) {
if (!queryTags || queryTags.length === 0)
return 0;
const intersection = rule.tags.filter(t => queryTags.includes(t));
return intersection.length > 0 ? Math.min(1, intersection.length / queryTags.length) : 0;
}
calculatePriorityScore(rule) {
return (rule.priority || 50) / 100;
}
// --- Main Score Function ---
scoreRule(rule, query, openFiles = [], changedFiles = [], queryTags = []) {
const sText = this.calculateTextScore(query, rule);
const sPath = this.calculatePathScore(rule, openFiles, changedFiles);
const sTag = this.calculateTagScore(rule, queryTags);
const sPriority = this.calculatePriorityScore(rule);
// avoid penalty
let penalty = 0;
if (rule.avoid && rule.avoid.length > 0) {
const avoidMatchesQuery = rule.avoid.some(avoidTerm => query.toLowerCase().includes(avoidTerm.toLowerCase()));
if (avoidMatchesQuery)
penalty = 0.5;
}
// Boost heuristics
let boost = 0;
const q = query.toLowerCase();
// Testing boost
if (/test|vitest|msw|coverage|kiểm tra/.test(q) && rule.tags.includes('testing'))
boost += 0.2;
// Frontend / React boost
if (/react|component|hook|tsx|frontend|giao diện/.test(q.replace(/\s+/g, '')) || /frond\s*end/.test(q)) {
if (rule.relativePath.includes('frontend') || rule.tags.includes('frontend') || rule.tags.includes('react')) {
boost += 0.2;
}
}
// Creator / Author boost
if (/creator|author|người viết|tác giả|ai viết/.test(q)) {
if (rule.content.toLowerCase().includes('creator') || rule.content.toLowerCase().includes('author')) {
boost += 0.3;
}
}
const weightedScore = (0.55 * sText) +
(0.25 * sPath) +
(0.12 * sTag) +
(0.08 * sPriority);
const finalScore = Math.max(0, weightedScore + boost - penalty);
return {
rule,
score: finalScore,
scoreBreakdown: {
text: sText,
path: sPath,
tag: sTag,
priority: sPriority
}
};
}
}
import { Scorer } from '../rules/scorer.js';
import { Composer } from '../rules/composer.js';
import { RuleIndexer } from '../rules/indexer.js';
import * as assert from 'assert';
// Simple test helper
function test(name, fn) {
try {
fn();
console.log(`✅ ${name}`);
}
catch (e) {
console.error(`❌ ${name}`);
console.error(e);
}
}
const mockRule = {
id: 'test-rule',
path: '/abs/test/frontend/rule.md',
relativePath: 'frontend/rule.md',
title: 'Test Rule',
content: 'Rule content',
tags: ['react', 'testing'],
priority: 80,
paths: ['**/*.tsx'],
applies_when: [],
avoid: [],
lastModified: 0
};
async function runTests() {
console.log('--- Running Scorer Tests ---');
// Scorer Path Tests
const scorer = new Scorer();
// We need to index rules first to initialize vectors if we test text score,
// but path score is independent.
scorer.indexRules([mockRule]);
test('Path Score: Exact Match', () => {
const score = scorer.calculatePathScore(mockRule, ['/src/app.tsx'], ['/src/component.tsx']);
// Changed file matches glob **/*.tsx
assert.ok(score >= 1.0, 'Should be 1.0 for changed file match');
});
test('Path Score: Open Match', () => {
const score = scorer.calculatePathScore(mockRule, ['/src/app.tsx'], []);
assert.ok(score >= 0.6, 'Should be 0.6 for open file match');
});
test('Path Score: No Match', () => {
const score = scorer.calculatePathScore(mockRule, ['/src/main.py'], []);
assert.strictEqual(score, 0, 'Should be 0 for no match');
});
console.log('--- Running Composer Tests ---');
const composer = new Composer('.');
test('Deduplication', () => {
const text = `
- Keep this
- Dedupe this
- Dedupe this
- Keep this too
`;
const result = composer.deduplicateLines(text);
const lines = result.split('\n').map((l) => l.trim()).filter((l) => l);
assert.strictEqual(lines.length, 3, 'Should remove 1 duplicate line');
});
console.log('--- Real File Indexing Test ---');
const indexer = new RuleIndexer(process.cwd());
await indexer.init(); // Load real files
const results = indexer.search('react component', {
openFiles: ['src/App.tsx']
});
console.log(`Found ${results.length} rules for "react component"`);
results.forEach(r => console.log(`- ${r.rule.title} (Score: ${r.score.toFixed(2)})`));
// Check if we found the new rules
const foundReact = results.some(r => r.rule.id === 'frontend-react' || r.rule.title.includes('React'));
assert.ok(foundReact, 'Should find React rules in sample files');
const baseContent = await indexer.getBaseRule();
assert.ok(baseContent.length > 0, 'Base rule should be loaded');
console.log('--- All Tests Finished ---');
}
runTests().catch(console.error);
export {};
import { minimatch } from 'minimatch';
import { glob } from 'glob';
import * as fs from 'fs';
export const findMarkdownFiles = async (cwd) => {
// Find all markdown files, ignoring node_modules and dot folders
const files = await glob('**/*.md', {
cwd,
ignore: ['**/node_modules/**', '**/.*/**', '**/Library/**', '**/Library/Application Support/**'],
absolute: true,
follow: false, // Don't follow symlinks to avoid duplication
});
// Use realpath to deduplicate and ensure unique files
const uniqueFiles = Array.from(new Set(files.map(f => {
try {
return fs.realpathSync(f);
}
catch {
return f;
}
})));
return uniqueFiles;
};
export const matchGlob = (filePath, patterns) => {
if (!patterns || patterns.length === 0)
return false;
// Normalize windows paths for matching
const normalizedPath = filePath.replace(/\\/g, '/');
for (const pattern of patterns) {
if (minimatch(normalizedPath, pattern, { dot: true, matchBase: true }))
return true;
}
return false;
};
......@@ -32,3 +32,5 @@ export const UserProfile = ({ userId, isActive = false }: Props) => {
## Performance
- Use `useMemo` and `useCallback` only when necessary to prevent expensive re-renders or stabilize references.
- Lazy load routes and heavy components using `React.lazy`.
The creator of this rule: LE DANG LONG
\ No newline at end of file
......@@ -3,6 +3,7 @@
"version": "1.0.0",
"description": "",
"main": "dist/index.js",
"type": "module",
"scripts": {
"build": "tsc",
"prestart": "npm run build",
......
......@@ -12,8 +12,12 @@ import { RuleIndexer } from './rules/indexer.js';
import { Composer } from './rules/composer.js';
import * as path from 'path';
import { fileURLToPath } from 'url';
// --- Configuration ---
const ROOT_DIR = process.cwd(); // Assume running from project root
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const ROOT_DIR = path.resolve(__dirname, '..'); // storage root is one level up from dist
// --- Initialization ---
const indexer = new RuleIndexer(ROOT_DIR);
......@@ -142,6 +146,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
title: r.rule.title,
score: r.score.toFixed(2),
tags: r.rule.tags,
content: r.rule.content,
why: `Text:${r.scoreBreakdown.text.toFixed(2)} Path:${r.scoreBreakdown.path.toFixed(2)} Tag:${r.scoreBreakdown.tag.toFixed(2)}`
})), null, 2),
},
......
import { Rule, RuleBundle } from '../types.js';
import { RuleLoader } from './loader.js';
import * as path from 'path';
export class Composer {
private loader: RuleLoader;
......@@ -43,7 +44,9 @@ export class Composer {
// Let's write this class to accept *resolved* array of Rules.
// If the tool definition says input is just ids, the MCP handler will use Indexer to get the Rules, then pass them here.
const rulesToInclude = selectedRules.map(s => s.rule).filter((r): r is Rule => !!r);
const rulesToInclude = selectedRules
.map(s => s.rule)
.filter((r): r is Rule => !!r && r.id !== 'global-base' && path.basename(r.path) !== 'base.md');
// 3. Start composing
let bundle = `# RULE BUNDLE\n> [!IMPORTANT] Priority Order: Selected rules override Base rules where conflicting. Security constraints in Base rules are non-negotiable.\n\n`;
......
......@@ -59,11 +59,15 @@ export class RuleIndexer {
.filter(r => r.score >= minScore)
.sort((a, b) => b.score - a.score);
// Diversification logic: max 2 rules per top-level folder
// Diversification and Deduplication logic
const diversified: SearchResult[] = [];
const categoryCounts: { [cat: string]: number } = {};
const seenIds = new Set<string>();
for (const res of sorted) {
// Deduplicate by ID
if (seenIds.has(res.rule.id)) continue;
// Determine category from relative path (first dir)
const parts = res.rule.relativePath.split(/[/\\]/);
const category = parts.length > 1 ? parts[0] : 'root';
......@@ -71,6 +75,7 @@ export class RuleIndexer {
if ((categoryCounts[category] || 0) < 2) {
diversified.push(res);
categoryCounts[category] = (categoryCounts[category] || 0) + 1;
seenIds.add(res.rule.id);
}
if (diversified.length >= limit) break;
......
......@@ -16,7 +16,8 @@ export class RuleLoader {
const rules: Rule[] = [];
for (const file of files) {
if (path.basename(file).toLowerCase() === 'base.md') continue;
// Note: We used to skip base.md here, but we want it indexed for search.
// if (path.basename(file).toLowerCase() === 'base.md') continue;
try {
const rule = await this.parseRule(file);
......
......@@ -53,55 +53,48 @@ export class Scorer {
}
private getRuleTextForIndex(rule: Rule): string {
return `${rule.title} ${rule.tags.join(' ')} ${rule.content}`;
// Boost title and tags by repeating them
return `${rule.title} ${rule.title} ${rule.tags.join(' ')} ${rule.tags.join(' ')} ${rule.content}`;
}
private tokenize(text: string): string[] {
return text.toLowerCase()
.replace(/[^a-z0-9\s]/g, '')
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
.split(/\s+/)
.filter(t => t.length > 2);
.filter(t => t.length >= 2);
}
// --- Scoring Components ---
private calculateTextScore(query: string, ruleId: string): number {
private calculateTextScore(query: string, rule: Rule): number {
const queryTerms = this.tokenize(query);
const ruleId = rule.id;
const ruleVec = this.ruleVectors.get(ruleId);
if (!ruleVec || queryTerms.length === 0) return 0;
let score = 0;
const ruleText = this.getRuleTextForIndex(rule).toLowerCase();
queryTerms.forEach(term => {
// Exact TF-IDF match
if (ruleVec[term]) {
score += ruleVec[term];
}
// Substring match boost (helps with typos or partial words like 'frond end' -> 'frontend')
else if (ruleText.includes(term)) {
score += 0.1; // Increased constant boost for partial matches
}
});
// Normalize score somewhat?
// TF-IDF summing can go > 1. Let's clamp or sigmoid it?
// Or just simple normalization if creating a relative ranking.
// For now, let's assume raw score is okay but maybe cap at 1.0 for the weighted sum formula
// because S_text is expected to be 0..1 in the prompt.
// A simple heuristic normalization: divide by max theoretical score or just 10?
// Let's use a simpler overlap metric for S_text if TF-IDF is too unbounded.
// Actually, BM25 returns unbounded scores usually.
// Let's check overlap of terms?
// "S_text: ... normalize 0..1"
// Let's try cosine similarity between query and doc?
// Query vector: tf=1 for all terms.
// Simple Jaccard/Overlap for robust 0-1?
// Let's do a localized TF-IDF cosine approx.
let magnitudeQuery = Math.sqrt(queryTerms.length); // approx
let magnitudeQuery = Math.sqrt(queryTerms.length);
let magnitudeDoc = 0;
Object.values(ruleVec).forEach(v => magnitudeDoc += v * v);
magnitudeDoc = Math.sqrt(magnitudeDoc);
if (magnitudeDoc === 0 || magnitudeQuery === 0) return 0;
// Rescale score to 0-1 range roughly
// Cosine similarity = dot_product / (magA * magB)
return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 5 + 0.1)); // Fudge factor
// Rescale score - using a more sensitive factor
return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 1.5 + 0.1));
}
private calculatePathScore(rule: Rule, openFiles: string[], changedFiles: string[]): number {
......@@ -134,7 +127,7 @@ export class Scorer {
changedFiles: string[] = [],
queryTags: string[] = []
): SearchResult {
const sText = this.calculateTextScore(query, rule.id);
const sText = this.calculateTextScore(query, rule);
const sPath = this.calculatePathScore(rule, openFiles, changedFiles);
const sTag = this.calculateTagScore(rule, queryTags);
const sPriority = this.calculatePriorityScore(rule);
......@@ -142,11 +135,6 @@ export class Scorer {
// avoid penalty
let penalty = 0;
if (rule.avoid && rule.avoid.length > 0) {
// If query or files match avoid criteria.
// Simple text match of generic terms in avoid list against query?
// Or if file path matches avoid glob?
// Prompt: "Penalty if query/file match with avoid"
// Let's assume avoid contains keywords or globs.
const avoidMatchesQuery = rule.avoid.some(avoidTerm => query.toLowerCase().includes(avoidTerm.toLowerCase()));
if (avoidMatchesQuery) penalty = 0.5;
}
......@@ -155,9 +143,22 @@ export class Scorer {
let boost = 0;
const q = query.toLowerCase();
if (/test|vitest|msw|coverage/.test(q) && rule.tags.includes('testing')) boost += 0.2;
if (/react|component|hook|tsx/.test(q) && (rule.relativePath.includes('frontend') || rule.tags.includes('react'))) boost += 0.2;
if (/graphql|mutation|schema/.test(q) && (rule.relativePath.includes('backend') || rule.tags.includes('graphql'))) boost += 0.2;
// Testing boost
if (/test|vitest|msw|coverage|kiểm tra/.test(q) && rule.tags.includes('testing')) boost += 0.2;
// Frontend / React boost
if (/react|component|hook|tsx|frontend|giao diện/.test(q.replace(/\s+/g, '')) || /frond\s*end/.test(q)) {
if (rule.relativePath.includes('frontend') || rule.tags.includes('frontend') || rule.tags.includes('react')) {
boost += 0.2;
}
}
// Creator / Author boost
if (/creator|author|người viết|tác giả|ai viết/.test(q)) {
if (rule.content.toLowerCase().includes('creator') || rule.content.toLowerCase().includes('author')) {
boost += 0.3;
}
}
const weightedScore =
(0.55 * sText) +
......
import { minimatch } from 'minimatch';
import { glob } from 'glob';
import * as fs from 'fs';
export const findMarkdownFiles = async (cwd: string): Promise<string[]> => {
// Find all markdown files, ignoring node_modules and dot folders
const files = await glob('**/*.md', {
cwd,
ignore: ['**/node_modules/**', '**/.*/**'],
ignore: ['**/node_modules/**', '**/.*/**', '**/Library/**', '**/Library/Application Support/**'],
absolute: true,
follow: false, // Don't follow symlinks to avoid duplication
});
return files;
// Use realpath to deduplicate and ensure unique files
const uniqueFiles = Array.from(new Set(files.map(f => {
try {
return fs.realpathSync(f);
} catch {
return f;
}
})));
return uniqueFiles;
};
export const matchGlob = (filePath: string, patterns: string[]): boolean => {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment