#!/usr/bin/env python
"""
CSV Data Loading Script - Fixed Version
Loads CSV data in small batches with correct field names.
"""
import os
import sys
import django
import pandas as pd
from datetime import datetime
from decimal import Decimal
import uuid
import traceback

# Add the project directory to Python path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# Set Django settings
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dubai_analytics.settings')

# Setup Django
django.setup()

from apps.analytics.models import (
    Broker, Developer, Project, Valuation, Land, Rent, Transaction, Forecast
)
from apps.users.models import User
from apps.core.models import APIRateLimit, SystemConfiguration

def safe_get(row, key, default=None):
    """Safely get value from pandas row, handling NaN values."""
    try:
        value = row.get(key, default)
        if pd.isna(value) or value == '' or str(value).lower() == 'nan':
            return default
        return value
    except:
        return default

def safe_decimal(value, default=0):
    """Safely convert to Decimal."""
    try:
        if pd.isna(value) or value == '' or str(value).lower() == 'nan':
            return Decimal(str(default))
        return Decimal(str(value))
    except:
        return Decimal(str(default))

def safe_datetime(value):
    """Safely convert to datetime."""
    try:
        if pd.isna(value) or value == '' or str(value).lower() == 'nan':
            return None
        return pd.to_datetime(value)
    except:
        return None

def safe_int(value, default=0):
    """Safely convert to int."""
    try:
        if pd.isna(value) or value == '' or str(value).lower() == 'nan':
            return default
        return int(float(value))
    except:
        return default

def safe_str(value, default=''):
    """Safely convert to string."""
    try:
        if pd.isna(value) or value == '' or str(value).lower() == 'nan':
            return default
        return str(value).strip()
    except:
        return default

def load_brokers_batch(csv_path, batch_size=10):
    """Load brokers data from CSV in batches."""
    print("📊 Loading brokers data in batches...")
    try:
        df = pd.read_csv(csv_path)
        print(f"   Found {len(df)} broker records")
        
        total_created = 0
        total_errors = 0
        
        for i in range(0, min(batch_size, len(df))):
            row = df.iloc[i]
            try:
                broker_number = safe_str(row.get('BROKER_NUMBER', ''))
                if not broker_number:
                    total_errors += 1
                    continue
                
                broker, created = Broker.objects.get_or_create(
                    broker_number=broker_number,
                    defaults={
                        'broker_name_en': safe_str(row.get('BROKER_EN', '')),
                        'gender': safe_str(row.get('GENDER_EN', 'male')),
                        'license_start_date': safe_datetime(row.get('LICENSE_START_DATE')) or datetime.now(),
                        'license_end_date': safe_datetime(row.get('LICENSE_END_DATE')) or datetime.now(),
                        'webpage': safe_str(row.get('WEBPAGE', '')),
                        'phone': safe_str(row.get('PHONE', '')),
                        'fax': safe_str(row.get('FAX', '')),
                        'real_estate_number': safe_str(row.get('REAL_ESTATE_NUMBER', '')),
                        'real_estate_name_en': safe_str(row.get('REAL_ESTATE_EN', '')),
                    }
                )
                if created:
                    total_created += 1
                    print(f"     ✅ Created broker: {broker_number}")
            except Exception as e:
                print(f"     ❌ Error creating broker {safe_str(row.get('BROKER_NUMBER', 'unknown'))}: {str(e)[:100]}")
                total_errors += 1
        
        print(f"   📊 Brokers Summary: {total_created} created, {total_errors} errors")
        return total_created
    except Exception as e:
        print(f"   ❌ Error loading brokers: {e}")
        return 0

def load_projects_batch(csv_path, batch_size=10):
    """Load projects data from CSV in batches."""
    print("🏢 Loading projects data in batches...")
    try:
        df = pd.read_csv(csv_path)
        print(f"   Found {len(df)} project records")
        
        total_created = 0
        total_errors = 0
        
        for i in range(0, min(batch_size, len(df))):
            row = df.iloc[i]
            try:
                project_number = safe_str(row.get('PROJECT_NUMBER', ''))
                if not project_number:
                    total_errors += 1
                    continue
                
                # Get or create developer
                developer = None
                dev_number = safe_str(row.get('DEVELOPER_NUMBER', ''))
                if dev_number:
                    developer, _ = Developer.objects.get_or_create(
                        developer_number=dev_number,
                        defaults={'developer_name_en': safe_str(row.get('DEVELOPER_EN', ''))}
                    )
                
                project, created = Project.objects.get_or_create(
                    project_number=project_number,
                    defaults={
                        'project_name_en': safe_str(row.get('PROJECT_EN', '')),
                        'developer': developer,
                        'start_date': safe_datetime(row.get('START_DATE')) or datetime.now(),
                        'end_date': safe_datetime(row.get('END_DATE')),
                        'adoption_date': safe_datetime(row.get('ADOPTION_DATE')),
                        'project_type': safe_str(row.get('PRJ_TYPE_EN', 'Normal')),
                        'project_value': safe_decimal(row.get('PROJECT_VALUE', 0)),
                        'escrow_account_number': safe_str(row.get('ESCROW_ACCOUNT_NUMBER', '')),
                        'project_status': safe_str(row.get('PROJECT_STATUS', 'ACTIVE')),
                        'percent_completed': safe_decimal(row.get('PERCENT_COMPLETED', 0)),
                        'inspection_date': safe_datetime(row.get('INSPECTION_DATE')),
                        'completion_date': safe_datetime(row.get('COMPLETION_DATE')),
                        'description_en': safe_str(row.get('DESCRIPTION_EN', '')),
                        'area_en': safe_str(row.get('AREA_EN', '')),
                        'zone_en': safe_str(row.get('ZONE_EN', '')),
                        'count_land': safe_int(row.get('CNT_LAND', 0)),
                        'count_building': safe_int(row.get('CNT_BUILDING', 0)),
                        'count_villa': safe_int(row.get('CNT_VILLA', 0)),
                        'count_unit': safe_int(row.get('CNT_UNIT', 0)),
                        'master_project_en': safe_str(row.get('MASTER_PROJECT_EN', '')),
                    }
                )
                if created:
                    total_created += 1
                    print(f"     ✅ Created project: {project_number}")
            except Exception as e:
                print(f"     ❌ Error creating project {safe_str(row.get('PROJECT_NUMBER', 'unknown'))}: {str(e)[:100]}")
                total_errors += 1
        
        print(f"   🏢 Projects Summary: {total_created} created, {total_errors} errors")
        return total_created
    except Exception as e:
        print(f"   ❌ Error loading projects: {e}")
        return 0

def load_lands_batch(csv_path, batch_size=10):
    """Load lands data from CSV in batches."""
    print("🏞️ Loading lands data in batches...")
    try:
        df = pd.read_csv(csv_path)
        print(f"   Found {len(df)} land records")
        
        total_created = 0
        total_errors = 0
        
        for i in range(0, min(batch_size, len(df))):
            row = df.iloc[i]
            try:
                # Create a unique land identifier
                land_id = f"LAND_{uuid.uuid4().hex[:8]}"
                
                land, created = Land.objects.get_or_create(
                    pre_registration_number=land_id,
                    defaults={
                        'land_type': safe_str(row.get('LAND_TYPE_EN', 'Residential')),
                        'property_sub_type': safe_str(row.get('PROP_SUB_TYPE_EN', '')),
                        'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
                        'is_offplan': safe_str(row.get('IS_OFFPLAN_EN', 'Ready')),
                        'is_freehold': safe_str(row.get('IS_FREE_HOLD_EN', 'Free Hold')),
                        'dm_zip_code': safe_str(row.get('DM_ZIP_CODE', '')),
                        'master_project': safe_str(row.get('MASTER_PROJECT_EN', '')),
                        'project_number': safe_str(row.get('PROJECT_NUMBER', '')),
                        'project_name_en': safe_str(row.get('PROJECT_EN', '')),
                        'area_en': safe_str(row.get('AREA_EN', '')),
                        'zone_en': safe_str(row.get('ZONE_EN', '')),
                    }
                )
                if created:
                    total_created += 1
                    print(f"     ✅ Created land: {land_id}")
            except Exception as e:
                print(f"     ❌ Error creating land: {str(e)[:100]}")
                total_errors += 1
        
        print(f"   🏞️ Lands Summary: {total_created} created, {total_errors} errors")
        return total_created
    except Exception as e:
        print(f"   ❌ Error loading lands: {e}")
        return 0

def load_rents_batch(csv_path, batch_size=10):
    """Load rents data from CSV in batches."""
    print("🏠 Loading rents data in batches...")
    try:
        df = pd.read_csv(csv_path)
        print(f"   Found {len(df)} rent records")
        
        total_created = 0
        total_errors = 0
        
        for i in range(0, min(batch_size, len(df))):
            row = df.iloc[i]
            try:
                # Create a unique rent identifier
                rent_id = f"RENT_{uuid.uuid4().hex[:8]}"
                
                rent, created = Rent.objects.get_or_create(
                    registration_number=rent_id,
                    defaults={
                        'registration_date': safe_datetime(row.get('REGISTRATION_DATE')) or datetime.now(),
                        'start_date': safe_datetime(row.get('START_DATE')) or datetime.now(),
                        'end_date': safe_datetime(row.get('END_DATE')) or datetime.now(),
                        'version': safe_str(row.get('VERSION', 'New')),
                        'area_en': safe_str(row.get('AREA_EN', '')),
                        'contract_amount': safe_decimal(row.get('CONTRACT_AMOUNT', 0)),
                        'annual_amount': safe_decimal(row.get('ANNUAL_AMOUNT', 0)),
                        'monthly_amount': safe_decimal(row.get('MONTHLY_AMOUNT', 0)),
                        'property_type': safe_str(row.get('PROPERTY_TYPE', 'Unit')),
                        'property_sub_type': safe_str(row.get('PROPERTY_SUB_TYPE', '')),
                        'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
                        'procedure_area': safe_decimal(row.get('PROCEDURE_AREA', 0)),
                        'rooms': safe_int(row.get('ROOMS', 0)),
                        'parking': safe_int(row.get('PARKING', 0)),
                        'nearest_metro': safe_str(row.get('NEAREST_METRO', '')),
                        'nearest_mall': safe_str(row.get('NEAREST_MALL', '')),
                        'nearest_landmark': safe_str(row.get('NEAREST_LANDMARK', '')),
                        'total_lessor': safe_int(row.get('TOTAL_LESSOR', 0)),
                        'total_lessee': safe_int(row.get('TOTAL_LESSEE', 0)),
                        'master_project': safe_str(row.get('MASTER_PROJECT', '')),
                        'project_number': safe_str(row.get('PROJECT_NUMBER', '')),
                        'project_name_en': safe_str(row.get('PROJECT_NAME_EN', '')),
                        'zone_en': safe_str(row.get('ZONE_EN', '')),
                    }
                )
                if created:
                    total_created += 1
                    print(f"     ✅ Created rent: {rent_id}")
            except Exception as e:
                print(f"     ❌ Error creating rent: {str(e)[:100]}")
                total_errors += 1
        
        print(f"   🏠 Rents Summary: {total_created} created, {total_errors} errors")
        return total_created
    except Exception as e:
        print(f"   ❌ Error loading rents: {e}")
        return 0

def load_transactions_batch(csv_path, batch_size=10):
    """Load transactions data from CSV in batches."""
    print("💼 Loading transactions data in batches...")
    try:
        df = pd.read_csv(csv_path)
        print(f"   Found {len(df)} transaction records")
        
        total_created = 0
        total_errors = 0
        
        for i in range(0, min(batch_size, len(df))):
            row = df.iloc[i]
            try:
                transaction_number = safe_str(row.get('TRANSACTION_NUMBER', ''))
                if not transaction_number:
                    total_errors += 1
                    continue
                
                # Get or create project
                project = None
                project_name = safe_str(row.get('PROJECT_EN', ''))
                if project_name:
                    project, _ = Project.objects.get_or_create(
                        project_number=safe_str(row.get('PROJECT_NUMBER', '')),
                        defaults={'project_name_en': project_name}
                    )
                
                transaction, created = Transaction.objects.get_or_create(
                    transaction_number=transaction_number,
                    defaults={
                        'instance_date': safe_datetime(row.get('INSTANCE_DATE')) or datetime.now(),
                        'group_en': safe_str(row.get('GROUP_EN', '')),
                        'procedure_en': safe_str(row.get('PROCEDURE_EN', '')),
                        'is_offplan': safe_str(row.get('IS_OFFPLAN_EN', 'Ready')),
                        'is_freehold': safe_str(row.get('IS_FREE_HOLD_EN', 'Free Hold')),
                        'usage_en': safe_str(row.get('USAGE_EN', '')),
                        'area_en': safe_str(row.get('AREA_EN', '')),
                        'property_type': safe_str(row.get('PROP_TYPE_EN', '')),
                        'property_sub_type': safe_str(row.get('PROP_SB_TYPE_EN', '')),
                        'transaction_value': safe_decimal(row.get('TRANS_VALUE', 0)),
                        'procedure_area': safe_decimal(row.get('PROCEDURE_AREA', 0)),
                        'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
                        'rooms': safe_int(row.get('ROOMS_EN', 0)),
                        'parking': safe_int(row.get('PARKING', 0)),
                        'nearest_metro': safe_str(row.get('NEAREST_METRO_EN', '')),
                        'nearest_mall': safe_str(row.get('NEAREST_MALL_EN', '')),
                        'nearest_landmark': safe_str(row.get('NEAREST_LANDMARK_EN', '')),
                        'total_buyer': safe_int(row.get('TOTAL_BUYER', 0)),
                        'total_seller': safe_int(row.get('TOTAL_SELLER', 0)),
                        'master_project': safe_str(row.get('MASTER_PROJECT_EN', '')),
                        'project': project,
                    }
                )
                if created:
                    total_created += 1
                    print(f"     ✅ Created transaction: {transaction_number}")
            except Exception as e:
                print(f"     ❌ Error creating transaction {safe_str(row.get('TRANSACTION_NUMBER', 'unknown'))}: {str(e)[:100]}")
                total_errors += 1
        
        print(f"   💼 Transactions Summary: {total_created} created, {total_errors} errors")
        return total_created
    except Exception as e:
        print(f"   ❌ Error loading transactions: {e}")
        return 0

def load_valuations_batch(csv_path, batch_size=10):
    """Load valuations data from CSV in batches."""
    print("💰 Loading valuations data in batches...")
    try:
        df = pd.read_csv(csv_path)
        print(f"   Found {len(df)} valuation records")
        
        total_created = 0
        total_errors = 0
        
        for i in range(0, min(batch_size, len(df))):
            row = df.iloc[i]
            try:
                # Create a unique valuation identifier
                valuation_id = f"VAL_{uuid.uuid4().hex[:8]}"
                
                valuation, created = Valuation.objects.get_or_create(
                    procedure_number=valuation_id,
                    defaults={
                        'property_total_value': safe_decimal(row.get('PROPERTY_TOTAL_VALUE', 0)),
                        'area_en': safe_str(row.get('AREA_EN', '')),
                        'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
                        'procedure_year': safe_int(row.get('PROCEDURE_YEAR', datetime.now().year)),
                        'instance_date': safe_datetime(row.get('INSTANCE_DATE')) or datetime.now(),
                        'actual_worth': safe_decimal(row.get('ACTUAL_WORTH', 0)),
                        'procedure_area': safe_decimal(row.get('PROCEDURE_AREA', 0)),
                        'property_type': safe_str(row.get('PROPERTY_TYPE', 'Unit')),
                        'property_sub_type': safe_str(row.get('PROPERTY_SUB_TYPE', '')),
                    }
                )
                if created:
                    total_created += 1
                    print(f"     ✅ Created valuation: {valuation_id}")
            except Exception as e:
                print(f"     ❌ Error creating valuation: {str(e)[:100]}")
                total_errors += 1
        
        print(f"   💰 Valuations Summary: {total_created} created, {total_errors} errors")
        return total_created
    except Exception as e:
        print(f"   ❌ Error loading valuations: {e}")
        return 0

def create_sample_forecasts():
    """Create sample forecast data."""
    print("🔮 Creating sample forecasts...")
    try:
        # Get some sample areas and property types from transactions
        areas = Transaction.objects.values_list('area_en', flat=True).distinct()[:5]
        property_types = Transaction.objects.values_list('property_type', flat=True).distinct()[:3]
        
        forecasts_created = 0
        for area in areas:
            for prop_type in property_types:
                if not area or not prop_type:
                    continue
                
                forecast, created = Forecast.objects.get_or_create(
                    area_en=area,
                    property_type=prop_type,
                    defaults={
                        'forecast_date': datetime.now().date(),
                        'predicted_price': Decimal('1000000.00'),
                        'confidence_interval_lower': Decimal('800000.00'),
                        'confidence_interval_upper': Decimal('1200000.00'),
                        'model_version': '1.0',
                        'accuracy_score': Decimal('0.85'),
                        'metadata': {'source': 'sample_data', 'model': 'linear_regression'}
                    }
                )
                if created:
                    forecasts_created += 1
        
        print(f"   ✅ Created {forecasts_created} sample forecasts")
        return forecasts_created
    except Exception as e:
        print(f"   ❌ Error creating forecasts: {e}")
        return 0

def setup_rate_limits():
    """Setup default rate limits for different subscription tiers."""
    print("⚙️ Setting up rate limits...")
    try:
        rate_limits = [
            {'subscription_type': 'free', 'requests_per_minute': 10, 'requests_per_hour': 100, 'requests_per_day': 1000},
            {'subscription_type': 'paid', 'requests_per_minute': 60, 'requests_per_hour': 1000, 'requests_per_day': 10000},
            {'subscription_type': 'premium', 'requests_per_minute': 120, 'requests_per_hour': 2000, 'requests_per_day': 20000},
        ]
        
        created_count = 0
        for limit_data in rate_limits:
            rate_limit, created = APIRateLimit.objects.get_or_create(
                subscription_type=limit_data['subscription_type'],
                defaults=limit_data
            )
            if created:
                created_count += 1
        
        print(f"   ✅ Created {created_count} rate limit configurations")
        return created_count
    except Exception as e:
        print(f"   ❌ Error setting up rate limits: {e}")
        return 0

def verify_data_loaded():
    """Verify that data has been loaded successfully."""
    print("\n🔍 Verifying loaded data...")
    try:
        counts = {
            'Brokers': Broker.objects.count(),
            'Developers': Developer.objects.count(),
            'Projects': Project.objects.count(),
            'Lands': Land.objects.count(),
            'Rents': Rent.objects.count(),
            'Transactions': Transaction.objects.count(),
            'Valuations': Valuation.objects.count(),
            'Forecasts': Forecast.objects.count(),
        }
        
        print("   📊 Current database counts:")
        for model_name, count in counts.items():
            print(f"     {model_name}: {count}")
        
        return counts
    except Exception as e:
        print(f"   ❌ Error verifying data: {e}")
        return {}

def main():
    """Main function to load all CSV data in batches."""
    print("=" * 70)
    print("  Dubai Analytics Platform - Fixed CSV Data Loader")
    print("  Loading 10 entries per table for testing")
    print("=" * 70)
    print()
    
    # Check if sample data directory exists
    sample_data_dir = "sample data"
    if not os.path.exists(sample_data_dir):
        print(f"❌ Sample data directory '{sample_data_dir}' not found!")
        print("   Please ensure the CSV files are in the 'sample data' directory.")
        return
    
    # Track total records created
    total_created = 0
    
    # Load each CSV file in batches
    csv_files = [
        ('brokers.csv', load_brokers_batch),
        ('projects.csv', load_projects_batch),
        ('lands.csv', load_lands_batch),
        ('rents.csv', load_rents_batch),
        ('transactions.csv', load_transactions_batch),
        ('valuations.csv', load_valuations_batch),
    ]
    
    for csv_file, load_function in csv_files:
        csv_path = os.path.join(sample_data_dir, csv_file)
        if os.path.exists(csv_path):
            print(f"\n📁 Processing {csv_file}...")
            created = load_function(csv_path, batch_size=10)
            total_created += created
        else:
            print(f"⚠️  File {csv_file} not found, skipping...")
    
    # Create sample forecasts
    print(f"\n🔮 Creating sample forecasts...")
    forecasts_created = create_sample_forecasts()
    total_created += forecasts_created
    
    # Setup rate limits
    print(f"\n⚙️ Setting up rate limits...")
    rate_limits_created = setup_rate_limits()
    
    # Verify data loaded
    counts = verify_data_loaded()
    
    # Summary
    print("\n" + "=" * 70)
    print("  Data Loading Summary")
    print("=" * 70)
    print(f"📊 Total records created: {total_created}")
    print(f"⚙️ Rate limits configured: {rate_limits_created}")
    print()
    print("✅ Fixed data loading completed successfully!")
    print()
    print("Next steps:")
    print("1. Access Django Admin: http://localhost:8000/admin/")
    print("2. Login with: admin@dubai-analytics.com / admin123")
    print("3. View the loaded data in the admin interface")
    print("4. Test the API endpoints with the sample data")

if __name__ == '__main__':
    main()