import { matchGlob } from '../utils/glob.js';
export class Scorer {
    idf = {};
    ruleVectors = new Map();
    constructor() { }
    // Update internal TF-IDF model based on all rules
    // Simplified version: Term Frequency in document * Inverse Document Frequency
    // For small corpus (50-200 files), in-memory is fine.
    indexRules(rules) {
        // 1. Calculate document frequencies
        const docFreq = {};
        const totalDocs = rules.length;
        rules.forEach(rule => {
            const terms = this.tokenize(this.getRuleTextForIndex(rule));
            const uniqueTerms = new Set(terms);
            uniqueTerms.forEach(term => {
                docFreq[term] = (docFreq[term] || 0) + 1;
            });
        });
        // 2. Calculate IDF
        this.idf = {};
        Object.keys(docFreq).forEach(term => {
            this.idf[term] = Math.log(1 + (totalDocs / (docFreq[term] || 1)));
        });
        // 3. Pre-calculate TF vectors for each rule
        this.ruleVectors.clear();
        rules.forEach(rule => {
            const terms = this.tokenize(this.getRuleTextForIndex(rule));
            const tf = {};
            const docLen = terms.length;
            terms.forEach(term => {
                tf[term] = (tf[term] || 0) + 1;
            });
            // Normalize TF? Or just use raw count? Let's use simple TF (count/len) * IDF
            const vec = {};
            Object.keys(tf).forEach(term => {
                vec[term] = (tf[term] / docLen) * (this.idf[term] || 0);
            });
            this.ruleVectors.set(rule.id, vec);
        });
    }
    getRuleTextForIndex(rule) {
        // Boost title and tags by repeating them
        return `${rule.title} ${rule.title} ${rule.tags.join(' ')} ${rule.tags.join(' ')} ${rule.content}`;
    }
    tokenize(text) {
        return text.toLowerCase()
            .replace(/[^\p{L}\p{N}\s]/gu, ' ')
            .split(/\s+/)
            .filter(t => t.length >= 2);
    }
    // --- Scoring Components ---
    calculateTextScore(query, rule) {
        const queryTerms = this.tokenize(query);
        const ruleId = rule.id;
        const ruleVec = this.ruleVectors.get(ruleId);
        if (!ruleVec || queryTerms.length === 0)
            return 0;
        let score = 0;
        const ruleText = this.getRuleTextForIndex(rule).toLowerCase();
        queryTerms.forEach(term => {
            // Exact TF-IDF match
            if (ruleVec[term]) {
                score += ruleVec[term];
            }
            // Substring match boost (helps with typos or partial words like 'frond end' -> 'frontend')
            else if (ruleText.includes(term)) {
                score += 0.1; // Increased constant boost for partial matches
            }
        });
        let magnitudeQuery = Math.sqrt(queryTerms.length);
        let magnitudeDoc = 0;
        Object.values(ruleVec).forEach(v => magnitudeDoc += v * v);
        magnitudeDoc = Math.sqrt(magnitudeDoc);
        if (magnitudeDoc === 0 || magnitudeQuery === 0)
            return 0;
        // Rescale score - using a more sensitive factor
        return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 1.5 + 0.1));
    }
    calculatePathScore(rule, openFiles, changedFiles) {
        if (!rule.paths || rule.paths.length === 0)
            return 0;
        const openMatches = openFiles.some(f => matchGlob(f, rule.paths));
        const changedMatches = changedFiles.some(f => matchGlob(f, rule.paths));
        if (changedMatches)
            return 1.0;
        if (openMatches)
            return 0.6;
        return 0;
    }
    calculateTagScore(rule, queryTags) {
        if (!queryTags || queryTags.length === 0)
            return 0;
        const intersection = rule.tags.filter(t => queryTags.includes(t));
        return intersection.length > 0 ? Math.min(1, intersection.length / queryTags.length) : 0;
    }
    calculatePriorityScore(rule) {
        return (rule.priority || 50) / 100;
    }
    // --- Main Score Function ---
    scoreRule(rule, query, openFiles = [], changedFiles = [], queryTags = []) {
        const sText = this.calculateTextScore(query, rule);
        const sPath = this.calculatePathScore(rule, openFiles, changedFiles);
        const sTag = this.calculateTagScore(rule, queryTags);
        const sPriority = this.calculatePriorityScore(rule);
        // avoid penalty
        let penalty = 0;
        if (rule.avoid && rule.avoid.length > 0) {
            const avoidMatchesQuery = rule.avoid.some(avoidTerm => query.toLowerCase().includes(avoidTerm.toLowerCase()));
            if (avoidMatchesQuery)
                penalty = 0.5;
        }
        // Boost heuristics
        let boost = 0;
        const q = query.toLowerCase();
        // Testing boost
        if (/test|vitest|msw|coverage|kiểm tra/.test(q) && rule.tags.includes('testing'))
            boost += 0.2;
        // Frontend / React boost
        if (/react|component|hook|tsx|frontend|giao diện/.test(q.replace(/\s+/g, '')) || /frond\s*end/.test(q)) {
            if (rule.relativePath.includes('frontend') || rule.tags.includes('frontend') || rule.tags.includes('react')) {
                boost += 0.2;
            }
        }
        // Creator / Author boost
        if (/creator|author|người viết|tác giả|ai viết/.test(q)) {
            if (rule.content.toLowerCase().includes('creator') || rule.content.toLowerCase().includes('author')) {
                boost += 0.3;
            }
        }
        const weightedScore = (0.55 * sText) +
            (0.25 * sPath) +
            (0.12 * sTag) +
            (0.08 * sPriority);
        const finalScore = Math.max(0, weightedScore + boost - penalty);
        return {
            rule,
            score: finalScore,
            scoreBreakdown: {
                text: sText,
                path: sPath,
                tag: sTag,
                priority: sPriority
            }
        };
    }
}
