#!/usr/bin/env python """ Load Remaining Data Script - Transactions, Rents, and Lands Loads the remaining CSV data with correct field names. """ import os import sys import django import pandas as pd from datetime import datetime from decimal import Decimal import uuid import traceback # Add the project directory to Python path sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Set Django settings os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dubai_analytics.settings') # Setup Django django.setup() from apps.analytics.models import ( Broker, Developer, Project, Valuation, Land, Rent, Transaction, Forecast ) from apps.users.models import User from apps.core.models import APIRateLimit, SystemConfiguration def safe_get(row, key, default=None): """Safely get value from pandas row, handling NaN values.""" try: value = row.get(key, default) if pd.isna(value) or value == '' or str(value).lower() == 'nan': return default return value except: return default def safe_decimal(value, default=0): """Safely convert to Decimal.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return Decimal(str(default)) return Decimal(str(value)) except: return Decimal(str(default)) def safe_datetime(value): """Safely convert to datetime.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return None return pd.to_datetime(value) except: return None def safe_int(value, default=0): """Safely convert to int.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return default return int(float(value)) except: return default def safe_str(value, default=''): """Safely convert to string.""" try: if pd.isna(value) or value == '' or str(value).lower() == 'nan': return default return str(value).strip() except: return default def load_lands_batch(csv_path, batch_size=10): """Load lands data from CSV in batches.""" print("šŸžļø Loading lands data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} land records") total_created = 0 total_errors = 0 for i in range(0, min(batch_size, len(df))): row = df.iloc[i] try: # Create a unique land identifier land_id = f"LAND_{uuid.uuid4().hex[:8]}" # Get or create project if available project = None project_name = safe_str(row.get('PROJECT_EN', '')) if project_name: project, _ = Project.objects.get_or_create( project_number=safe_str(row.get('PROJECT_NUMBER', '')), defaults={'project_name_en': project_name} ) land, created = Land.objects.get_or_create( pre_registration_number=land_id, defaults={ 'land_type': safe_str(row.get('LAND_TYPE_EN', 'Residential')), 'property_sub_type': safe_str(row.get('PROP_SUB_TYPE_EN', '')), 'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)), 'is_offplan': safe_str(row.get('IS_OFFPLAN_EN', 'Ready')), 'is_freehold': safe_str(row.get('IS_FREE_HOLD_EN', 'Free Hold')), 'dm_zip_code': safe_str(row.get('DM_ZIP_CODE', '')), 'master_project': safe_str(row.get('MASTER_PROJECT_EN', '')), 'project': project, 'area_en': safe_str(row.get('AREA_EN', '')), 'zone_en': safe_str(row.get('ZONE_EN', '')), } ) if created: total_created += 1 print(f" āœ… Created land: {land_id}") except Exception as e: print(f" āŒ Error creating land: {str(e)[:100]}") total_errors += 1 print(f" šŸžļø Lands Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading lands: {e}") return 0 def load_rents_batch(csv_path, batch_size=10): """Load rents data from CSV in batches.""" print("šŸ  Loading rents data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} rent records") total_created = 0 total_errors = 0 for i in range(0, min(batch_size, len(df))): row = df.iloc[i] try: # Create a unique rent identifier rent_id = f"RENT_{uuid.uuid4().hex[:8]}" rent, created = Rent.objects.get_or_create( registration_date=safe_datetime(row.get('REGISTRATION_DATE')) or datetime.now(), start_date=safe_datetime(row.get('START_DATE')) or datetime.now(), end_date=safe_datetime(row.get('END_DATE')) or datetime.now(), defaults={ 'version': safe_str(row.get('VERSION', 'New')), 'area_en': safe_str(row.get('AREA_EN', '')), 'contract_amount': safe_decimal(row.get('CONTRACT_AMOUNT', 0)), 'annual_amount': safe_decimal(row.get('ANNUAL_AMOUNT', 0)), 'is_freehold': safe_str(row.get('IS_FREE_HOLD', 'Free Hold')), 'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)), 'property_type': safe_str(row.get('PROPERTY_TYPE', 'Unit')), 'property_sub_type': safe_str(row.get('PROPERTY_SUB_TYPE', '')), 'rooms': safe_str(row.get('ROOMS', '')), 'usage': safe_str(row.get('USAGE', 'Residential')), } ) if created: total_created += 1 print(f" āœ… Created rent: {rent_id}") except Exception as e: print(f" āŒ Error creating rent: {str(e)[:100]}") total_errors += 1 print(f" šŸ  Rents Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading rents: {e}") return 0 def load_transactions_batch(csv_path, batch_size=10): """Load transactions data from CSV in batches.""" print("šŸ’¼ Loading transactions data in batches...") try: df = pd.read_csv(csv_path) print(f" Found {len(df)} transaction records") total_created = 0 total_errors = 0 for i in range(0, min(batch_size, len(df))): row = df.iloc[i] try: transaction_number = safe_str(row.get('TRANSACTION_NUMBER', '')) if not transaction_number: total_errors += 1 continue # Get or create project project = None project_name = safe_str(row.get('PROJECT_EN', '')) if project_name: project, _ = Project.objects.get_or_create( project_number=safe_str(row.get('PROJECT_NUMBER', '')), defaults={'project_name_en': project_name} ) transaction, created = Transaction.objects.get_or_create( transaction_number=transaction_number, defaults={ 'instance_date': safe_datetime(row.get('INSTANCE_DATE')) or datetime.now(), 'group': safe_str(row.get('GROUP_EN', 'Sale')), 'procedure': safe_str(row.get('PROCEDURE_EN', '')), 'is_offplan': safe_str(row.get('IS_OFFPLAN_EN', 'Ready')), 'is_freehold': safe_str(row.get('IS_FREE_HOLD_EN', 'Free Hold')), 'usage': safe_str(row.get('USAGE_EN', 'Residential')), 'area_en': safe_str(row.get('AREA_EN', '')), 'property_type': safe_str(row.get('PROP_TYPE_EN', 'Unit')), 'property_sub_type': safe_str(row.get('PROP_SB_TYPE_EN', '')), 'transaction_value': safe_decimal(row.get('TRANS_VALUE', 0)), 'procedure_area': safe_decimal(row.get('PROCEDURE_AREA', 0)), 'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)), 'rooms': safe_str(row.get('ROOMS_EN', '')), 'parking': safe_int(row.get('PARKING', 0)), 'nearest_metro': safe_str(row.get('NEAREST_METRO_EN', '')), 'nearest_mall': safe_str(row.get('NEAREST_MALL_EN', '')), 'nearest_landmark': safe_str(row.get('NEAREST_LANDMARK_EN', '')), 'total_buyer': safe_int(row.get('TOTAL_BUYER', 0)), 'total_seller': safe_int(row.get('TOTAL_SELLER', 0)), 'master_project': safe_str(row.get('MASTER_PROJECT_EN', '')), 'project': project, } ) if created: total_created += 1 print(f" āœ… Created transaction: {transaction_number}") except Exception as e: print(f" āŒ Error creating transaction {safe_str(row.get('TRANSACTION_NUMBER', 'unknown'))}: {str(e)[:100]}") total_errors += 1 print(f" šŸ’¼ Transactions Summary: {total_created} created, {total_errors} errors") return total_created except Exception as e: print(f" āŒ Error loading transactions: {e}") return 0 def create_sample_forecasts(): """Create sample forecast data.""" print("šŸ”® Creating sample forecasts...") try: # Get some sample areas and property types from transactions areas = Transaction.objects.values_list('area_en', flat=True).distinct()[:5] property_types = Transaction.objects.values_list('property_type', flat=True).distinct()[:3] forecasts_created = 0 for area in areas: for prop_type in property_types: if not area or not prop_type: continue forecast, created = Forecast.objects.get_or_create( area_en=area, property_type=prop_type, defaults={ 'forecast_date': datetime.now().date(), 'predicted_price': Decimal('1000000.00'), 'confidence_interval_lower': Decimal('800000.00'), 'confidence_interval_upper': Decimal('1200000.00'), 'model_version': '1.0', 'accuracy_score': Decimal('0.85'), 'metadata': {'source': 'sample_data', 'model': 'linear_regression'} } ) if created: forecasts_created += 1 print(f" āœ… Created {forecasts_created} sample forecasts") return forecasts_created except Exception as e: print(f" āŒ Error creating forecasts: {e}") return 0 def verify_data_loaded(): """Verify that data has been loaded successfully.""" print("\nšŸ” Verifying loaded data...") try: counts = { 'Brokers': Broker.objects.count(), 'Developers': Developer.objects.count(), 'Projects': Project.objects.count(), 'Lands': Land.objects.count(), 'Rents': Rent.objects.count(), 'Transactions': Transaction.objects.count(), 'Valuations': Valuation.objects.count(), 'Forecasts': Forecast.objects.count(), } print(" šŸ“Š Current database counts:") for model_name, count in counts.items(): print(f" {model_name}: {count}") return counts except Exception as e: print(f" āŒ Error verifying data: {e}") return {} def main(): """Main function to load remaining CSV data.""" print("=" * 70) print(" Dubai Analytics Platform - Load Remaining Data") print(" Loading Transactions, Rents, and Lands") print("=" * 70) print() # Check if sample data directory exists sample_data_dir = "sample data" if not os.path.exists(sample_data_dir): print(f"āŒ Sample data directory '{sample_data_dir}' not found!") print(" Please ensure the CSV files are in the 'sample data' directory.") return # Track total records created total_created = 0 # Load each CSV file in batches csv_files = [ ('lands.csv', load_lands_batch), ('rents.csv', load_rents_batch), ('transactions.csv', load_transactions_batch), ] for csv_file, load_function in csv_files: csv_path = os.path.join(sample_data_dir, csv_file) if os.path.exists(csv_path): print(f"\nšŸ“ Processing {csv_file}...") created = load_function(csv_path, batch_size=10) total_created += created else: print(f"āš ļø File {csv_file} not found, skipping...") # Create sample forecasts print(f"\nšŸ”® Creating sample forecasts...") forecasts_created = create_sample_forecasts() total_created += forecasts_created # Verify data loaded counts = verify_data_loaded() # Summary print("\n" + "=" * 70) print(" Data Loading Summary") print("=" * 70) print(f"šŸ“Š Total records created: {total_created}") print() print("āœ… Remaining data loading completed successfully!") print() print("Next steps:") print("1. Access Django Admin: http://localhost:8000/admin/") print("2. Login with: admin@dubai-analytics.com / admin123") print("3. View the loaded data in the admin interface") print("4. Test the API endpoints with the sample data") if __name__ == '__main__': main()