import { Rule, SearchResult } from '../types.js';
import { matchGlob } from '../utils/glob.js';

interface TFIDFIndex {
    [term: string]: number; // IDF scores
}

export class Scorer {
    private idf: { [term: string]: number } = {};
    private ruleVectors: Map<string, { [term: string]: number }> = new Map();

    constructor() { }

    // Update internal TF-IDF model based on all rules
    // Simplified version: Term Frequency in document * Inverse Document Frequency
    // For small corpus (50-200 files), in-memory is fine.
    public indexRules(rules: Rule[]) {
        // 1. Calculate document frequencies
        const docFreq: { [term: string]: number } = {};
        const totalDocs = rules.length;

        rules.forEach(rule => {
            const terms = this.tokenize(this.getRuleTextForIndex(rule));
            const uniqueTerms = new Set(terms);
            uniqueTerms.forEach(term => {
                docFreq[term] = (docFreq[term] || 0) + 1;
            });
        });

        // 2. Calculate IDF
        this.idf = {};
        Object.keys(docFreq).forEach(term => {
            this.idf[term] = Math.log(1 + (totalDocs / (docFreq[term] || 1)));
        });

        // 3. Pre-calculate TF vectors for each rule
        this.ruleVectors.clear();
        rules.forEach(rule => {
            const terms = this.tokenize(this.getRuleTextForIndex(rule));
            const tf: { [term: string]: number } = {};
            const docLen = terms.length;
            terms.forEach(term => {
                tf[term] = (tf[term] || 0) + 1;
            });

            // Normalize TF? Or just use raw count? Let's use simple TF (count/len) * IDF
            const vec: { [term: string]: number } = {};
            Object.keys(tf).forEach(term => {
                vec[term] = (tf[term] / docLen) * (this.idf[term] || 0);
            });
            this.ruleVectors.set(rule.id, vec);
        });
    }

    private getRuleTextForIndex(rule: Rule): string {
        return `${rule.title} ${rule.tags.join(' ')} ${rule.content}`;
    }

    private tokenize(text: string): string[] {
        return text.toLowerCase()
            .replace(/[^a-z0-9\s]/g, '')
            .split(/\s+/)
            .filter(t => t.length > 2);
    }

    // --- Scoring Components ---

    private calculateTextScore(query: string, ruleId: string): number {
        const queryTerms = this.tokenize(query);
        const ruleVec = this.ruleVectors.get(ruleId);
        if (!ruleVec || queryTerms.length === 0) return 0;

        let score = 0;
        queryTerms.forEach(term => {
            if (ruleVec[term]) {
                score += ruleVec[term];
            }
        });

        // Normalize score somewhat? 
        // TF-IDF summing can go > 1. Let's clamp or sigmoid it? 
        // Or just simple normalization if creating a relative ranking.
        // For now, let's assume raw score is okay but maybe cap at 1.0 for the weighted sum formula
        // because S_text is expected to be 0..1 in the prompt.
        // A simple heuristic normalization: divide by max theoretical score or just 10?
        // Let's use a simpler overlap metric for S_text if TF-IDF is too unbounded.
        // Actually, BM25 returns unbounded scores usually.
        // Let's check overlap of terms? 
        // "S_text: ... normalize 0..1"
        // Let's try cosine similarity between query and doc?
        // Query vector: tf=1 for all terms.

        // Simple Jaccard/Overlap for robust 0-1?
        // Let's do a localized TF-IDF cosine approx.
        let magnitudeQuery = Math.sqrt(queryTerms.length); // approx
        let magnitudeDoc = 0;
        Object.values(ruleVec).forEach(v => magnitudeDoc += v * v);
        magnitudeDoc = Math.sqrt(magnitudeDoc);

        if (magnitudeDoc === 0 || magnitudeQuery === 0) return 0;

        // Rescale score to 0-1 range roughly
        // Cosine similarity = dot_product / (magA * magB)
        return Math.min(1, score / (magnitudeQuery * magnitudeDoc * 5 + 0.1)); // Fudge factor
    }

    private calculatePathScore(rule: Rule, openFiles: string[], changedFiles: string[]): number {
        if (!rule.paths || rule.paths.length === 0) return 0;

        const openMatches = openFiles.some(f => matchGlob(f, rule.paths));
        const changedMatches = changedFiles.some(f => matchGlob(f, rule.paths));

        if (changedMatches) return 1.0;
        if (openMatches) return 0.6;
        return 0;
    }

    private calculateTagScore(rule: Rule, queryTags: string[]): number {
        if (!queryTags || queryTags.length === 0) return 0;
        const intersection = rule.tags.filter(t => queryTags.includes(t));
        return intersection.length > 0 ? Math.min(1, intersection.length / queryTags.length) : 0;
    }

    private calculatePriorityScore(rule: Rule): number {
        return (rule.priority || 50) / 100;
    }

    // --- Main Score Function ---

    public scoreRule(
        rule: Rule,
        query: string,
        openFiles: string[] = [],
        changedFiles: string[] = [],
        queryTags: string[] = []
    ): SearchResult {
        const sText = this.calculateTextScore(query, rule.id);
        const sPath = this.calculatePathScore(rule, openFiles, changedFiles);
        const sTag = this.calculateTagScore(rule, queryTags);
        const sPriority = this.calculatePriorityScore(rule);

        // avoid penalty
        let penalty = 0;
        if (rule.avoid && rule.avoid.length > 0) {
            // If query or files match avoid criteria. 
            // Simple text match of generic terms in avoid list against query?
            // Or if file path matches avoid glob?
            // Prompt: "Penalty if query/file match with avoid"
            // Let's assume avoid contains keywords or globs.
            const avoidMatchesQuery = rule.avoid.some(avoidTerm => query.toLowerCase().includes(avoidTerm.toLowerCase()));
            if (avoidMatchesQuery) penalty = 0.5;
        }

        // Boost heuristics
        let boost = 0;
        const q = query.toLowerCase();

        if (/test|vitest|msw|coverage/.test(q) && rule.tags.includes('testing')) boost += 0.2;
        if (/react|component|hook|tsx/.test(q) && (rule.relativePath.includes('frontend') || rule.tags.includes('react'))) boost += 0.2;
        if (/graphql|mutation|schema/.test(q) && (rule.relativePath.includes('backend') || rule.tags.includes('graphql'))) boost += 0.2;

        const weightedScore =
            (0.55 * sText) +
            (0.25 * sPath) +
            (0.12 * sTag) +
            (0.08 * sPriority);

        const finalScore = Math.max(0, weightedScore + boost - penalty);

        return {
            rule,
            score: finalScore,
            scoreBreakdown: {
                text: sText,
                path: sPath,
                tag: sTag,
                priority: sPriority
            }
        };
    }
}
