import journalData from '../data/ifqbt.json';

// Create a map of journal names and abbreviations for quick lookup, for reference
const journalMap = {};
const partialNameMap = {};

journalData.forEach(journal => {
  const addToMaps = (name) => {
    const normalized = name.toLowerCase().replace(/\./g, '').replace(/\s/g, '');
    journalMap[normalized] = journal;
    
    const words = name.split(/\s+/);
    for (let i = 1; i < words.length; i++) {
      const partial = words.slice(0, i).join(' ').toLowerCase();
      if (!partialNameMap[partial]) {
        partialNameMap[partial] = [];
      }
      partialNameMap[partial].push(journal);
    }
  };

  addToMaps(journal.journal);
  if (journal.jcr) {
    addToMaps(journal.jcr);
  }
});

function findLongestMatch(text) {
  const words = text.split(/\s+/);
  let longestMatch = '';
  let currentPhrase = '';
  const maxIterations = 100; // Prevent infinite loops
  let iterations = 0;

  for (let i = 0; i < words.length && iterations < maxIterations; i++, iterations++) {
    if (currentPhrase) currentPhrase += ' ';
    currentPhrase += words[i];
    
    if (journalMap[currentPhrase.toLowerCase()]) {
      longestMatch = currentPhrase;
    }
    
    // Check for multi-word journal names
    for (let j = i + 1; j < Math.min(i + 5, words.length); j++) {
      const phraseWithNext = currentPhrase + ' ' + words.slice(i + 1, j + 1).join(' ');
      if (journalMap[phraseWithNext.toLowerCase()]) {
        longestMatch = phraseWithNext;
        i = j; // Skip the next words as they're part of the journal name
        break;
      }
    }
  }

  if (iterations >= maxIterations) {
    console.warn("Reached maximum iterations in findLongestMatch. Possible infinite loop prevented.");
  }

  return longestMatch;
}

export function extractJournalInfo(citationText) {
  // Step 1: Check for hard-to-extract magazines
  let journalName = checkHardMagazines(citationText);

  // Step 2: If not a hard magazine, try regex patterns
  if (!journalName) {
    journalName = extractJournalNameByRegex(citationText);
  }

  // Step 3: If regex fails, try findLongestMatch
  if (!journalName) {
    console.log("No journal name found using regex, trying findLongestMatch");
    journalName = findLongestMatch(citationText);
  }

  // Step 4: If still no match, give up
  if (!journalName) {
    console.log("No journal name found in citation:", citationText);
    return null;
  }

  // Remove trailing period if present
  journalName = journalName.replace(/\.$/, '');
  console.log("Extracted journal name:", journalName);

  // Look up the journal in our map
  let journalInfo = findJournalInfo(journalName);

  if (!journalInfo) {
    console.log("No matching journal info found for:", journalName);
    return null;
  }

  console.log("Found journal info:", journalInfo);
  return {
    name: journalInfo.journal,
    if: journalInfo.IF,
    q: journalInfo.Q,
    b: journalInfo.B,
    t: journalInfo.T
  };
}

function checkHardMagazines(citationText) {
  // Check for eLife
  const eLifePattern = /eLife\s+\d+/i;
  if (eLifePattern.test(citationText)) {
    return 'eLife';
  }

  // Add more hard magazine checks here in the future
  // Example:
  // if (/SomeDifficultJournal\s+\d+/i.test(citationText)) {
  //   return 'Some Difficult Journal';
  // }

  return null;
}

function extractJournalNameByRegex(citationText) {
  const journalPatterns = [
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=\s+https?:\/\/)/,
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=\s+\d+\s*,\s*\d+–?\d*\s*\(\d{4}\))/,
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=\s+\d+\s*,\s*[A-Za-z]\d+\s*\(\d{4}\))/,
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=\s+\d+\s*:\s*[A-Za-z]\d+\s*\(\d{4}\))/,
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=\s+\d{4};)/,
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=,\s*\d{1,3}\s*\(\d{1,2}\))/,
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=\s+\d+\s*,\s*[A-Za-z]+\d+\s*\(\d{4}\))/,
    /\b[A-Z]+\b(?=\s+\d+)/,
    /(?:\b(?:(?!et al\.?)[A-Z][a-z]*\.?\s?)+)(?=\s+\d+)/,
  ];

  for (const pattern of journalPatterns) {
    const match = citationText.match(pattern);
    if (match) {
      return match[0].trim();
    }
  }

  return null;
}

function findJournalInfo(journalName, depth = 0) {
  // Add a depth limit to prevent excessive recursion
  if (depth > 3) {
    console.warn(`Reached maximum recursion depth for journal name: ${journalName}`);
    return null;
  }

  const normalizedName = journalName.toLowerCase().replace(/\./g, '').replace(/\s/g, '');

  // Try exact match
  let journalInfo = journalMap[normalizedName];

  if (journalInfo) {
    return journalInfo;
  }

  // If not found, try expanding the search
  const words = journalName.split(/\s+/);
  if (words.length > 1) {
    for (let i = words.length - 1; i > 0; i--) {
      const partialName = words.slice(0, i).join(' ');
      journalInfo = findJournalInfo(partialName, depth + 1);
      if (journalInfo) {
        return journalInfo;
      }
    }
  }

  // If still not found, try removing 'The' from the beginning
  if (journalName.toLowerCase().startsWith('the ')) {
    const nameWithoutThe = journalName.slice(4);
    journalInfo = findJournalInfo(nameWithoutThe, depth + 1);
    if (journalInfo) {
      return journalInfo;
    }
  }

  // Special case for JCO Precis. Oncol.
  if (journalName.includes('Precis. Oncol')) {
    journalInfo = findJournalInfo('JCO Precis. Oncol', depth + 1);
    if (journalInfo) {
      return journalInfo;
    }
  }

  return null;
}