saas-market-analysis-dubai/load_csv_data_v2.py
2025-09-17 03:04:22 +05:30

426 lines
17 KiB
Python
Executable File

#!/usr/bin/env python
"""
CSV Data Loading Script for Dubai Analytics Platform
Loads all CSV data from the sample data directory into the database.
"""
import os
import sys
import django
import pandas as pd
from datetime import datetime
from decimal import Decimal
import uuid
# Add the project directory to Python path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# Set Django settings
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dubai_analytics.settings')
# Setup Django
django.setup()
from apps.analytics.models import (
Broker, Developer, Project, Valuation, Land, Rent, Transaction, Forecast
)
from apps.users.models import User
from apps.core.models import APIRateLimit, SystemConfiguration
def safe_get(row, key, default=None):
"""Safely get value from pandas row, handling NaN values."""
try:
value = row.get(key, default)
if pd.isna(value):
return default
return value
except:
return default
def safe_decimal(value, default=0):
"""Safely convert to Decimal."""
try:
if pd.isna(value) or value == '':
return Decimal(str(default))
return Decimal(str(value))
except:
return Decimal(str(default))
def safe_datetime(value):
"""Safely convert to datetime."""
try:
if pd.isna(value) or value == '':
return None
return pd.to_datetime(value)
except:
return None
def load_brokers(csv_path):
"""Load brokers data from CSV."""
print("📊 Loading brokers data...")
try:
df = pd.read_csv(csv_path)
print(f" Found {len(df)} broker records")
brokers_created = 0
for _, row in df.iterrows():
try:
broker, created = Broker.objects.get_or_create(
broker_number=str(safe_get(row, 'BROKER_NUMBER', '')),
defaults={
'broker_name_en': str(safe_get(row, 'BROKER_EN', '')),
'gender': str(safe_get(row, 'GENDER_EN', 'male')),
'license_start_date': safe_datetime(safe_get(row, 'LICENSE_START_DATE')),
'license_end_date': safe_datetime(safe_get(row, 'LICENSE_END_DATE')),
'webpage': str(safe_get(row, 'WEBPAGE', '')),
'phone': str(safe_get(row, 'PHONE', '')),
'fax': str(safe_get(row, 'FAX', '')),
'real_estate_number': str(safe_get(row, 'REAL_ESTATE_NUMBER', '')),
'real_estate_name_en': str(safe_get(row, 'REAL_ESTATE_EN', '')),
}
)
if created:
brokers_created += 1
except Exception as e:
print(f" Warning: Error creating broker {safe_get(row, 'BROKER_NUMBER', 'unknown')}: {e}")
continue
print(f" ✅ Created {brokers_created} new brokers")
return brokers_created
except Exception as e:
print(f" ❌ Error loading brokers: {e}")
return 0
def load_projects(csv_path):
"""Load projects data from CSV."""
print("🏢 Loading projects data...")
try:
df = pd.read_csv(csv_path)
print(f" Found {len(df)} project records")
projects_created = 0
for _, row in df.iterrows():
try:
# Get or create developer
developer = None
dev_number = safe_get(row, 'DEVELOPER_NUMBER')
if dev_number and str(dev_number) != 'nan':
developer, _ = Developer.objects.get_or_create(
developer_number=str(dev_number),
defaults={'developer_name_en': str(safe_get(row, 'DEVELOPER_EN', ''))}
)
project, created = Project.objects.get_or_create(
project_number=str(safe_get(row, 'PROJECT_NUMBER', '')),
defaults={
'project_name_en': str(safe_get(row, 'PROJECT_EN', '')),
'project_status': str(safe_get(row, 'PROJECT_STATUS', 'active')),
'area_en': str(safe_get(row, 'AREA_EN', '')),
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
'developer': developer,
'total_units': int(safe_get(row, 'CNT_UNIT', 0)),
'completion_percentage': safe_decimal(safe_get(row, 'PERCENT_COMPLETED', 0)),
'launch_date': safe_datetime(safe_get(row, 'START_DATE')),
'completion_date': safe_datetime(safe_get(row, 'COMPLETION_DATE')),
}
)
if created:
projects_created += 1
except Exception as e:
print(f" Warning: Error creating project {safe_get(row, 'PROJECT_NUMBER', 'unknown')}: {e}")
continue
print(f" ✅ Created {projects_created} new projects")
return projects_created
except Exception as e:
print(f" ❌ Error loading projects: {e}")
return 0
def load_lands(csv_path):
"""Load lands data from CSV."""
print("🏞️ Loading lands data...")
try:
df = pd.read_csv(csv_path)
print(f" Found {len(df)} land records")
lands_created = 0
for _, row in df.iterrows():
try:
# Create a unique land number if not available
land_number = safe_get(row, 'PRE_REGISTRATION_NUMBER')
if not land_number or str(land_number) == 'nan':
land_number = f"LAND_{uuid.uuid4().hex[:8]}"
land, created = Land.objects.get_or_create(
land_number=str(land_number),
defaults={
'land_type': str(safe_get(row, 'LAND_TYPE_EN', '')),
'area_en': str(safe_get(row, 'AREA_EN', '')),
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
'actual_area': safe_decimal(safe_get(row, 'ACTUAL_AREA', 0)),
'is_freehold': bool(safe_get(row, 'IS_FREE_HOLD_EN', False)),
'land_use': str(safe_get(row, 'PROP_SUB_TYPE_EN', '')),
'plot_number': str(safe_get(row, 'DM_ZIP_CODE', '')),
'street_number': str(safe_get(row, 'PROJECT_NUMBER', '')),
}
)
if created:
lands_created += 1
except Exception as e:
print(f" Warning: Error creating land: {e}")
continue
print(f" ✅ Created {lands_created} new lands")
return lands_created
except Exception as e:
print(f" ❌ Error loading lands: {e}")
return 0
def load_rents(csv_path):
"""Load rents data from CSV."""
print("🏠 Loading rents data...")
try:
df = pd.read_csv(csv_path)
print(f" Found {len(df)} rent records")
rents_created = 0
for _, row in df.iterrows():
try:
# Create a unique rent number if not available
rent_number = f"RENT_{uuid.uuid4().hex[:8]}"
rent, created = Rent.objects.get_or_create(
rent_number=rent_number,
defaults={
'property_type': str(safe_get(row, 'PROP_TYPE_EN', '')),
'area_en': str(safe_get(row, 'AREA_EN', '')),
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
'rent_date': safe_datetime(safe_get(row, 'INSTANCE_DATE')),
'annual_rent': safe_decimal(safe_get(row, 'ANNUAL_RENT', 0)),
'monthly_rent': safe_decimal(safe_get(row, 'MONTHLY_RENT', 0)),
'property_area': safe_decimal(safe_get(row, 'PROCEDURE_AREA', 0)),
'rent_per_sqft': safe_decimal(safe_get(row, 'RENT_PER_SQFT', 0)),
}
)
if created:
rents_created += 1
except Exception as e:
print(f" Warning: Error creating rent: {e}")
continue
print(f" ✅ Created {rents_created} new rents")
return rents_created
except Exception as e:
print(f" ❌ Error loading rents: {e}")
return 0
def load_transactions(csv_path):
"""Load transactions data from CSV."""
print("💼 Loading transactions data...")
try:
df = pd.read_csv(csv_path)
print(f" Found {len(df)} transaction records")
transactions_created = 0
for _, row in df.iterrows():
try:
# Get or create project
project = None
project_name = safe_get(row, 'PROJECT_EN')
if project_name and str(project_name) != 'nan':
project, _ = Project.objects.get_or_create(
project_number=str(safe_get(row, 'PROJECT_NUMBER', '')),
defaults={'project_name_en': str(project_name)}
)
transaction, created = Transaction.objects.get_or_create(
transaction_number=str(safe_get(row, 'TRANSACTION_NUMBER', '')),
defaults={
'instance_date': safe_datetime(safe_get(row, 'INSTANCE_DATE')),
'area_en': str(safe_get(row, 'AREA_EN', '')),
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
'property_type': str(safe_get(row, 'PROP_TYPE_EN', '')),
'transaction_value': safe_decimal(safe_get(row, 'TRANS_VALUE', 0)),
'property_area': safe_decimal(safe_get(row, 'PROCEDURE_AREA', 0)),
'price_per_sqft': safe_decimal(safe_get(row, 'PRICE_PER_SQFT', 0)),
'group': str(safe_get(row, 'GROUP_EN', '')),
'usage': str(safe_get(row, 'USAGE_EN', '')),
'master_project': str(safe_get(row, 'MASTER_PROJECT_EN', '')),
'project': project,
}
)
if created:
transactions_created += 1
except Exception as e:
print(f" Warning: Error creating transaction {safe_get(row, 'TRANSACTION_NUMBER', 'unknown')}: {e}")
continue
print(f" ✅ Created {transactions_created} new transactions")
return transactions_created
except Exception as e:
print(f" ❌ Error loading transactions: {e}")
return 0
def load_valuations(csv_path):
"""Load valuations data from CSV."""
print("💰 Loading valuations data...")
try:
df = pd.read_csv(csv_path)
print(f" Found {len(df)} valuation records")
valuations_created = 0
for _, row in df.iterrows():
try:
# Create a unique valuation number if not available
valuation_number = f"VAL_{uuid.uuid4().hex[:8]}"
valuation, created = Valuation.objects.get_or_create(
valuation_number=valuation_number,
defaults={
'property_type': str(safe_get(row, 'PROP_TYPE_EN', '')),
'area_en': str(safe_get(row, 'AREA_EN', '')),
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
'valuation_date': safe_datetime(safe_get(row, 'VALUATION_DATE')),
'property_value': safe_decimal(safe_get(row, 'PROPERTY_VALUE', 0)),
'land_value': safe_decimal(safe_get(row, 'LAND_VALUE', 0)),
'building_value': safe_decimal(safe_get(row, 'BUILDING_VALUE', 0)),
'total_area': safe_decimal(safe_get(row, 'TOTAL_AREA', 0)),
'land_area': safe_decimal(safe_get(row, 'LAND_AREA', 0)),
'building_area': safe_decimal(safe_get(row, 'BUILDING_AREA', 0)),
}
)
if created:
valuations_created += 1
except Exception as e:
print(f" Warning: Error creating valuation: {e}")
continue
print(f" ✅ Created {valuations_created} new valuations")
return valuations_created
except Exception as e:
print(f" ❌ Error loading valuations: {e}")
return 0
def create_sample_forecasts():
"""Create sample forecast data."""
print("🔮 Creating sample forecasts...")
try:
# Get some sample areas and property types from transactions
areas = Transaction.objects.values_list('area_en', flat=True).distinct()[:5]
property_types = Transaction.objects.values_list('property_type', flat=True).distinct()[:3]
forecasts_created = 0
for area in areas:
for prop_type in property_types:
if not area or not prop_type:
continue
forecast, created = Forecast.objects.get_or_create(
area_en=area,
property_type=prop_type,
defaults={
'forecast_date': datetime.now().date(),
'predicted_price': Decimal('1000000.00'),
'confidence_interval_lower': Decimal('800000.00'),
'confidence_interval_upper': Decimal('1200000.00'),
'model_version': '1.0',
'accuracy_score': Decimal('0.85'),
'metadata': {'source': 'sample_data', 'model': 'linear_regression'}
}
)
if created:
forecasts_created += 1
print(f" ✅ Created {forecasts_created} sample forecasts")
return forecasts_created
except Exception as e:
print(f" ❌ Error creating forecasts: {e}")
return 0
def setup_rate_limits():
"""Setup default rate limits for different subscription tiers."""
print("⚙️ Setting up rate limits...")
try:
rate_limits = [
{'subscription_type': 'free', 'requests_per_minute': 10, 'requests_per_hour': 100, 'requests_per_day': 1000},
{'subscription_type': 'paid', 'requests_per_minute': 60, 'requests_per_hour': 1000, 'requests_per_day': 10000},
{'subscription_type': 'premium', 'requests_per_minute': 120, 'requests_per_hour': 2000, 'requests_per_day': 20000},
]
created_count = 0
for limit_data in rate_limits:
rate_limit, created = APIRateLimit.objects.get_or_create(
subscription_type=limit_data['subscription_type'],
defaults=limit_data
)
if created:
created_count += 1
print(f" ✅ Created {created_count} rate limit configurations")
return created_count
except Exception as e:
print(f" ❌ Error setting up rate limits: {e}")
return 0
def main():
"""Main function to load all CSV data."""
print("=" * 60)
print(" Dubai Analytics Platform - CSV Data Loader v2")
print("=" * 60)
print()
# Check if sample data directory exists
sample_data_dir = "sample data"
if not os.path.exists(sample_data_dir):
print(f"❌ Sample data directory '{sample_data_dir}' not found!")
print(" Please ensure the CSV files are in the 'sample data' directory.")
return
# Track total records created
total_created = 0
# Load each CSV file
csv_files = [
('brokers.csv', load_brokers),
('projects.csv', load_projects),
('lands.csv', load_lands),
('rents.csv', load_rents),
('transactions.csv', load_transactions),
('valuations.csv', load_valuations),
]
for csv_file, load_function in csv_files:
csv_path = os.path.join(sample_data_dir, csv_file)
if os.path.exists(csv_path):
created = load_function(csv_path)
total_created += created
else:
print(f"⚠️ File {csv_file} not found, skipping...")
print()
# Create sample forecasts
forecasts_created = create_sample_forecasts()
total_created += forecasts_created
print()
# Setup rate limits
rate_limits_created = setup_rate_limits()
print()
# Summary
print("=" * 60)
print(" Data Loading Summary")
print("=" * 60)
print(f"📊 Total records created: {total_created}")
print(f"⚙️ Rate limits configured: {rate_limits_created}")
print()
print("✅ Data loading completed successfully!")
print()
print("Next steps:")
print("1. Access Django Admin: http://localhost:8000/admin/")
print("2. Login with: admin@dubai-analytics.com / admin123")
print("3. View the loaded data in the admin interface")
print("4. Test the API endpoints with the sample data")
if __name__ == '__main__':
main()