import XLSX from 'xlsx';
import { writeFileSync, existsSync } from 'fs';
import { join } from 'path';

// Function to clean and format text
function cleanText(text) {
  if (!text) return '';
  return String(text).trim().replace(/\s+/g, ' ');
}

// Function to extract phone numbers from text
function extractPhone(text) {
  if (!text) return '';
  const phoneRegex = /(\+?27|0)[1-9]\d{8}|\d{3}[-\s]?\d{3}[-\s]?\d{4}/g;
  const matches = String(text).match(phoneRegex);
  return matches ? matches[0] : '';
}

// Function to extract email from text
function extractEmail(text) {
  if (!text) return '';
  const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
  const matches = String(text).match(emailRegex);
  return matches ? matches[0] : '';
}

// Function to extract website from text
function extractWebsite(text) {
  if (!text) return '';
  const websiteRegex = /(https?:\/\/[^\s]+|www\.[^\s]+|[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/g;
  const matches = String(text).match(websiteRegex);
  if (matches) {
    let website = matches[0];
    if (!website.startsWith('http')) {
      website = 'https://' + website;
    }
    return website;
  }
  return '';
}

// Process Excel file (without downloading images)
function processExcelFile(filepath, category) {
  console.log(`Processing file: ${filepath} for category: ${category}`);
  
  const workbook = XLSX.readFile(filepath);
  const sheetName = workbook.SheetNames[0];
  const worksheet = workbook.Sheets[sheetName];
  const data = XLSX.utils.sheet_to_json(worksheet);
  
  const businesses = [];
  
  for (let i = 0; i < Math.min(data.length, 1000); i++) { // Limit to 1000 per category for faster processing
    const row = data[i];
    
    // Extract business information using actual column names
    const name = cleanText(row['name'] || '');
    
    if (!name) continue; // Skip if no business name
    
    const description = cleanText(row['description'] || '');
    const slogan = cleanText(row['slogan'] || '');
    const address = cleanText(row['address'] || '');
    const contact_phone = cleanText(row['phone'] || '');
    const contact_email = cleanText(row['email'] || '');
    const website = cleanText(row['website'] || '');
    const contact_person = cleanText(row['contact_person'] || '');
    const is_verified = row['is_verified'] === 'Yes';
    const badges = cleanText(row['badges'] || '');
    const geo = cleanText(row['geo'] || '');
    const store_id = row['store_id'] || '';
    
    // Parse geo coordinates
    let latitude = null, longitude = null;
    if (geo && geo.includes(',')) {
      const [lat, lng] = geo.split(',');
      latitude = parseFloat(lat.trim());
      longitude = parseFloat(lng.trim());
    }
    
    const business = {
      store_id: store_id,
      name: name,
      description: description || slogan || `${name} - ${category} services`,
      category: category,
      location: address || 'South Africa',
      contact_email: contact_email,
      contact_phone: contact_phone,
      website: website,
      contact_person: contact_person,
      slogan: slogan,
      latitude: latitude,
      longitude: longitude,
      image_url: null, // Will be set later when images are downloaded
      original_image_url: '', // No image URLs in this dataset
      is_verified: is_verified,
      rating: 0,
      review_count: 0,
      is_featured: badges.toLowerCase().includes('premium'),
      featured_until: badges.toLowerCase().includes('premium') ? new Date(Date.now() + 30 * 24 * 60 * 60 * 1000).toISOString() : null, // 30 days from now
      highlight_color: null,
      created_at: new Date().toISOString()
    };
    
    businesses.push(business);
  }
  
  console.log(`Processed ${businesses.length} businesses from ${category}`);
  return businesses;
}

// Main processing function
function processAllFiles() {
  const categories = [
    { file: 'yep-co-za_Agriculture_1753460631863.xlsx', category: 'Agriculture' },
    { file: 'yep-co-za_Car_&_Automotive_1753460631864.xlsx', category: 'Car & Automotive' },
    { file: 'yep-co-za_Education_Training_Lessons_1753460631864.xlsx', category: 'Education & Training' },
    { file: 'yep-co-za_Engineering_1753460631864.xlsx', category: 'Engineering' },
    { file: 'yep-co-za_Events_&_entertainment_1753460631865.xlsx', category: 'Events & Entertainment' },
    { file: 'yep-co-za_Financial_&_Insurance_Services_1753460631865.xlsx', category: 'Financial & Insurance' },
    { file: 'yep-co-za_Home_Building_&_Trade_1753460631865.xlsx', category: 'Home Building & Trade' },
    { file: 'yep-co-za_Hospitality_1753460631866.xlsx', category: 'Hospitality' },
    { file: 'yep-co-za_IT_Computer_&_Technology_1753460631866.xlsx', category: 'IT & Technology' },
    { file: 'yep-co-za_Marketing_Business,_and_Legal_1753460631860.xlsx', category: 'Marketing & Legal' },
    { file: 'yep-co-za_Medical_Wellness_&_Beauty_1753460631861.xlsx', category: 'Medical & Beauty' },
    { file: 'yep-co-za_Public_Service_&_Government_1753460631861.xlsx', category: 'Public Service' },
    { file: 'yep-co-za_Security_Systems_&_Protection_1753460631862.xlsx', category: 'Security & Protection' },
    { file: 'yep-co-za_Telecommunication_1753460631862.xlsx', category: 'Telecommunication' },
    { file: 'yep-co-za_Trade_&_Industry_1753460631862.xlsx', category: 'Trade & Industry' },
    { file: 'yep-co-za_Transport_1753460631863.xlsx', category: 'Transport' }
  ];
  
  const allBusinesses = [];
  
  for (const { file, category } of categories) {
    const filepath = join('attached_assets', file);
    
    if (existsSync(filepath)) {
      try {
        const businesses = processExcelFile(filepath, category);
        allBusinesses.push(...businesses);
      } catch (error) {
        console.error(`Error processing ${file}:`, error.message);
      }
    } else {
      console.log(`File not found: ${filepath}`);
    }
  }
  
  // Save all businesses to JSON file
  writeFileSync('processed_businesses.json', JSON.stringify(allBusinesses, null, 2));
  console.log(`Total businesses processed: ${allBusinesses.length}`);
  console.log('Data saved to processed_businesses.json');
  
  return allBusinesses;
}

// Run the processing
processAllFiles();