#!/usr/bin/env python """ CSV Data Loading Script - Batch Processing with Error Handling Loads CSV data in small batches (10 entries at a time) with comprehensive error handling. """ import os import sys import django import pandas as pd from datetime import datetime from decimal import Decimal import uuid import traceback # Add the project directory to Python path sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Set Django settings os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dubai_analytics.settings') # Setup Django django.setup() from apps.analytics.models import ( Broker, Developer, Project, Valuation, Land, Rent, Transaction, Forecast ) from apps.users.models import User from apps.core.models import APIRateLimit, SystemConfiguration def safe_get(row, key, default=None): """Safely get value from pandas row, handling NaN values.""" try: value = row.get(key, default) if pd.isna(value) or value == '' or str(value).lower() == 'nan': return default return value except: return default def safe_decimal(value, default=0): """Safely convert to Decimal.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return Decimal(str(default)) return Decimal(str(value)) except: return Decimal(str(default)) def safe_datetime(value): """Safely convert to datetime.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return None return pd.to_datetime(value) except: return None def safe_int(value, default=0): """Safely convert to int.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return default return int(float(value)) except: return default def safe_str(value, default=''): """Safely convert to string.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return default return str(value).strip() except: return default def load_brokers_batch(csv_path, batch_size=10): """Load brokers data from CSV in batches.""" print("šŸ“Š Loading brokers data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} broker records") total_created = 0 total_errors = 0 for i in range(0, len(df), batch_size): batch_df = df.iloc[i:i+batch_size] batch_created = 0 batch_errors = 0 print(f" Processing batch {i//batch_size + 1} (rows {i+1}-{min(i+batch_size, len(df))})...") for _, row in batch_df.iterrows(): try: broker_number = safe_str(row.get('BROKER_NUMBER', '')) if not broker_number: batch_errors += 1 continue broker, created = Broker.objects.get_or_create( broker_number=broker_number, defaults={ 'broker_name_en': safe_str(row.get('BROKER_EN', '')), 'gender': safe_str(row.get('GENDER_EN', 'male')), 'license_start_date': safe_datetime(row.get('LICENSE_START_DATE')) or datetime.now(), 'license_end_date': safe_datetime(row.get('LICENSE_END_DATE')) or datetime.now(), 'webpage': safe_str(row.get('WEBPAGE', '')), 'phone': safe_str(row.get('PHONE', '')), 'fax': safe_str(row.get('FAX', '')), 'real_estate_number': safe_str(row.get('REAL_ESTATE_NUMBER', '')), 'real_estate_name_en': safe_str(row.get('REAL_ESTATE_EN', '')), } ) if created: batch_created += 1 except Exception as e: print(f" āŒ Error creating broker {safe_str(row.get('BROKER_NUMBER', 'unknown'))}: {str(e)[:100]}") batch_errors += 1 print(f" āœ… Batch {i//batch_size + 1}: {batch_created} created, {batch_errors} errors") total_created += batch_created total_errors += batch_errors # Stop after first batch for testing if i == 0: break print(f" šŸ“Š Brokers Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading brokers: {e}") return 0 def load_projects_batch(csv_path, batch_size=10): """Load projects data from CSV in batches.""" print("šŸ¢ Loading projects data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} project records") total_created = 0 total_errors = 0 for i in range(0, len(df), batch_size): batch_df = df.iloc[i:i+batch_size] batch_created = 0 batch_errors = 0 print(f" Processing batch {i//batch_size + 1} (rows {i+1}-{min(i+batch_size, len(df))})...") for _, row in batch_df.iterrows(): try: project_number = safe_str(row.get('PROJECT_NUMBER', '')) if not project_number: batch_errors += 1 continue # Get or create developer developer = None dev_number = safe_str(row.get('DEVELOPER_NUMBER', '')) if dev_number: developer, _ = Developer.objects.get_or_create( developer_number=dev_number, defaults={'developer_name_en': safe_str(row.get('DEVELOPER_EN', ''))} ) project, created = Project.objects.get_or_create( project_number=project_number, defaults={ 'project_name_en': safe_str(row.get('PROJECT_EN', '')), 'project_status': safe_str(row.get('PROJECT_STATUS', 'active')), 'area_en': safe_str(row.get('AREA_EN', '')), 'zone_en': safe_str(row.get('ZONE_EN', '')), 'developer': developer, 'total_units': safe_int(row.get('CNT_UNIT', 0)), 'completion_percentage': safe_decimal(row.get('PERCENT_COMPLETED', 0)), 'launch_date': safe_datetime(row.get('START_DATE')) or datetime.now().date(), 'completion_date': safe_datetime(row.get('COMPLETION_DATE')), } ) if created: batch_created += 1 except Exception as e: print(f" āŒ Error creating project {safe_str(row.get('PROJECT_NUMBER', 'unknown'))}: {str(e)[:100]}") batch_errors += 1 print(f" āœ… Batch {i//batch_size + 1}: {batch_created} created, {batch_errors} errors") total_created += batch_created total_errors += batch_errors # Stop after first batch for testing if i == 0: break print(f" šŸ¢ Projects Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading projects: {e}") return 0 def load_lands_batch(csv_path, batch_size=10): """Load lands data from CSV in batches.""" print("šŸžļø Loading lands data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} land records") total_created = 0 total_errors = 0 for i in range(0, len(df), batch_size): batch_df = df.iloc[i:i+batch_size] batch_created = 0 batch_errors = 0 print(f" Processing batch {i//batch_size + 1} (rows {i+1}-{min(i+batch_size, len(df))})...") for _, row in batch_df.iterrows(): try: # Create a unique land number if not available land_number = safe_str(row.get('PRE_REGISTRATION_NUMBER', '')) if not land_number: land_number = f"LAND_{uuid.uuid4().hex[:8]}" land, created = Land.objects.get_or_create( land_number=land_number, defaults={ 'land_type': safe_str(row.get('LAND_TYPE_EN', '')), 'area_en': safe_str(row.get('AREA_EN', '')), 'zone_en': safe_str(row.get('ZONE_EN', '')), 'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)), 'is_freehold': bool(safe_str(row.get('IS_FREE_HOLD_EN', 'False')).lower() == 'true'), 'land_use': safe_str(row.get('PROP_SUB_TYPE_EN', '')), 'plot_number': safe_str(row.get('DM_ZIP_CODE', '')), 'street_number': safe_str(row.get('PROJECT_NUMBER', '')), } ) if created: batch_created += 1 except Exception as e: print(f" āŒ Error creating land: {str(e)[:100]}") batch_errors += 1 print(f" āœ… Batch {i//batch_size + 1}: {batch_created} created, {batch_errors} errors") total_created += batch_created total_errors += batch_errors # Stop after first batch for testing if i == 0: break print(f" šŸžļø Lands Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading lands: {e}") return 0 def load_rents_batch(csv_path, batch_size=10): """Load rents data from CSV in batches.""" print("šŸ  Loading rents data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} rent records") total_created = 0 total_errors = 0 for i in range(0, len(df), batch_size): batch_df = df.iloc[i:i+batch_size] batch_created = 0 batch_errors = 0 print(f" Processing batch {i//batch_size + 1} (rows {i+1}-{min(i+batch_size, len(df))})...") for _, row in batch_df.iterrows(): try: # Create a unique rent number rent_number = f"RENT_{uuid.uuid4().hex[:8]}" rent, created = Rent.objects.get_or_create( rent_number=rent_number, defaults={ 'property_type': safe_str(row.get('PROP_TYPE_EN', '')), 'area_en': safe_str(row.get('AREA_EN', '')), 'zone_en': safe_str(row.get('ZONE_EN', '')), 'rent_date': safe_datetime(row.get('INSTANCE_DATE')) or datetime.now(), 'annual_rent': safe_decimal(row.get('ANNUAL_RENT', 0)), 'monthly_rent': safe_decimal(row.get('MONTHLY_RENT', 0)), 'property_area': safe_decimal(row.get('PROCEDURE_AREA', 0)), 'rent_per_sqft': safe_decimal(row.get('RENT_PER_SQFT', 0)), } ) if created: batch_created += 1 except Exception as e: print(f" āŒ Error creating rent: {str(e)[:100]}") batch_errors += 1 print(f" āœ… Batch {i//batch_size + 1}: {batch_created} created, {batch_errors} errors") total_created += batch_created total_errors += batch_errors # Stop after first batch for testing if i == 0: break print(f" šŸ  Rents Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading rents: {e}") return 0 def load_transactions_batch(csv_path, batch_size=10): """Load transactions data from CSV in batches.""" print("šŸ’¼ Loading transactions data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} transaction records") total_created = 0 total_errors = 0 for i in range(0, len(df), batch_size): batch_df = df.iloc[i:i+batch_size] batch_created = 0 batch_errors = 0 print(f" Processing batch {i//batch_size + 1} (rows {i+1}-{min(i+batch_size, len(df))})...") for _, row in batch_df.iterrows(): try: transaction_number = safe_str(row.get('TRANSACTION_NUMBER', '')) if not transaction_number: batch_errors += 1 continue # Get or create project project = None project_name = safe_str(row.get('PROJECT_EN', '')) if project_name: project, _ = Project.objects.get_or_create( project_number=safe_str(row.get('PROJECT_NUMBER', '')), defaults={'project_name_en': project_name} ) transaction, created = Transaction.objects.get_or_create( transaction_number=transaction_number, defaults={ 'instance_date': safe_datetime(row.get('INSTANCE_DATE')) or datetime.now(), 'area_en': safe_str(row.get('AREA_EN', '')), 'zone_en': safe_str(row.get('ZONE_EN', '')), 'property_type': safe_str(row.get('PROP_TYPE_EN', '')), 'transaction_value': safe_decimal(row.get('TRANS_VALUE', 0)), 'property_area': safe_decimal(row.get('PROCEDURE_AREA', 0)), 'price_per_sqft': safe_decimal(row.get('PRICE_PER_SQFT', 0)), 'group': safe_str(row.get('GROUP_EN', '')), 'usage': safe_str(row.get('USAGE_EN', '')), 'master_project': safe_str(row.get('MASTER_PROJECT_EN', '')), 'project': project, } ) if created: batch_created += 1 except Exception as e: print(f" āŒ Error creating transaction {safe_str(row.get('TRANSACTION_NUMBER', 'unknown'))}: {str(e)[:100]}") batch_errors += 1 print(f" āœ… Batch {i//batch_size + 1}: {batch_created} created, {batch_errors} errors") total_created += batch_created total_errors += batch_errors # Stop after first batch for testing if i == 0: break print(f" šŸ’¼ Transactions Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading transactions: {e}") return 0 def load_valuations_batch(csv_path, batch_size=10): """Load valuations data from CSV in batches.""" print("šŸ’° Loading valuations data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} valuation records") total_created = 0 total_errors = 0 for i in range(0, len(df), batch_size): batch_df = df.iloc[i:i+batch_size] batch_created = 0 batch_errors = 0 print(f" Processing batch {i//batch_size + 1} (rows {i+1}-{min(i+batch_size, len(df))})...") for _, row in batch_df.iterrows(): try: # Create a unique valuation number valuation_number = f"VAL_{uuid.uuid4().hex[:8]}" valuation, created = Valuation.objects.get_or_create( valuation_number=valuation_number, defaults={ 'property_type': safe_str(row.get('PROP_TYPE_EN', '')), 'area_en': safe_str(row.get('AREA_EN', '')), 'zone_en': safe_str(row.get('ZONE_EN', '')), 'valuation_date': safe_datetime(row.get('VALUATION_DATE')) or datetime.now(), 'property_value': safe_decimal(row.get('PROPERTY_VALUE', 0)), 'land_value': safe_decimal(row.get('LAND_VALUE', 0)), 'building_value': safe_decimal(row.get('BUILDING_VALUE', 0)), 'total_area': safe_decimal(row.get('TOTAL_AREA', 0)), 'land_area': safe_decimal(row.get('LAND_AREA', 0)), 'building_area': safe_decimal(row.get('BUILDING_AREA', 0)), } ) if created: batch_created += 1 except Exception as e: print(f" āŒ Error creating valuation: {str(e)[:100]}") batch_errors += 1 print(f" āœ… Batch {i//batch_size + 1}: {batch_created} created, {batch_errors} errors") total_created += batch_created total_errors += batch_errors # Stop after first batch for testing if i == 0: break print(f" šŸ’° Valuations Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading valuations: {e}") return 0 def create_sample_forecasts(): """Create sample forecast data.""" print("šŸ”® Creating sample forecasts...") try: # Get some sample areas and property types from transactions areas = Transaction.objects.values_list('area_en', flat=True).distinct()[:5] property_types = Transaction.objects.values_list('property_type', flat=True).distinct()[:3] forecasts_created = 0 for area in areas: for prop_type in property_types: if not area or not prop_type: continue forecast, created = Forecast.objects.get_or_create( area_en=area, property_type=prop_type, defaults={ 'forecast_date': datetime.now().date(), 'predicted_price': Decimal('1000000.00'), 'confidence_interval_lower': Decimal('800000.00'), 'confidence_interval_upper': Decimal('1200000.00'), 'model_version': '1.0', 'accuracy_score': Decimal('0.85'), 'metadata': {'source': 'sample_data', 'model': 'linear_regression'} } ) if created: forecasts_created += 1 print(f" āœ… Created {forecasts_created} sample forecasts") return forecasts_created except Exception as e: print(f" āŒ Error creating forecasts: {e}") return 0 def setup_rate_limits(): """Setup default rate limits for different subscription tiers.""" print("āš™ļø Setting up rate limits...") try: rate_limits = [ {'subscription_type': 'free', 'requests_per_minute': 10, 'requests_per_hour': 100, 'requests_per_day': 1000}, {'subscription_type': 'paid', 'requests_per_minute': 60, 'requests_per_hour': 1000, 'requests_per_day': 10000}, {'subscription_type': 'premium', 'requests_per_minute': 120, 'requests_per_hour': 2000, 'requests_per_day': 20000}, ] created_count = 0 for limit_data in rate_limits: rate_limit, created = APIRateLimit.objects.get_or_create( subscription_type=limit_data['subscription_type'], defaults=limit_data ) if created: created_count += 1 print(f" āœ… Created {created_count} rate limit configurations") return created_count except Exception as e: print(f" āŒ Error setting up rate limits: {e}") return 0 def verify_data_loaded(): """Verify that data has been loaded successfully.""" print("\nšŸ” Verifying loaded data...") try: counts = { 'Brokers': Broker.objects.count(), 'Developers': Developer.objects.count(), 'Projects': Project.objects.count(), 'Lands': Land.objects.count(), 'Rents': Rent.objects.count(), 'Transactions': Transaction.objects.count(), 'Valuations': Valuation.objects.count(), 'Forecasts': Forecast.objects.count(), } print(" šŸ“Š Current database counts:") for model_name, count in counts.items(): print(f" {model_name}: {count}") return counts except Exception as e: print(f" āŒ Error verifying data: {e}") return {} def main(): """Main function to load all CSV data in batches.""" print("=" * 70) print(" Dubai Analytics Platform - Batch CSV Data Loader") print(" Loading 10 entries per table for testing") print("=" * 70) print() # Check if sample data directory exists sample_data_dir = "sample data" if not os.path.exists(sample_data_dir): print(f"āŒ Sample data directory '{sample_data_dir}' not found!") print(" Please ensure the CSV files are in the 'sample data' directory.") return # Track total records created total_created = 0 # Load each CSV file in batches csv_files = [ ('brokers.csv', load_brokers_batch), ('projects.csv', load_projects_batch), ('lands.csv', load_lands_batch), ('rents.csv', load_rents_batch), ('transactions.csv', load_transactions_batch), ('valuations.csv', load_valuations_batch), ] for csv_file, load_function in csv_files: csv_path = os.path.join(sample_data_dir, csv_file) if os.path.exists(csv_path): print(f"\nšŸ“ Processing {csv_file}...") created = load_function(csv_path, batch_size=10) total_created += created else: print(f"āš ļø File {csv_file} not found, skipping...") # Create sample forecasts print(f"\nšŸ”® Creating sample forecasts...") forecasts_created = create_sample_forecasts() total_created += forecasts_created # Setup rate limits print(f"\nāš™ļø Setting up rate limits...") rate_limits_created = setup_rate_limits() # Verify data loaded counts = verify_data_loaded() # Summary print("\n" + "=" * 70) print(" Data Loading Summary") print("=" * 70) print(f"šŸ“Š Total records created: {total_created}") print(f"āš™ļø Rate limits configured: {rate_limits_created}") print() print("āœ… Batch data loading completed successfully!") print() print("Next steps:") print("1. Access Django Admin: http://localhost:8000/admin/") print("2. Login with: admin@dubai-analytics.com / admin123") print("3. View the loaded data in the admin interface") print("4. Test the API endpoints with the sample data") if __name__ == '__main__': main()