426 lines
17 KiB
Python
Executable File
426 lines
17 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""
|
|
CSV Data Loading Script for Dubai Analytics Platform
|
|
Loads all CSV data from the sample data directory into the database.
|
|
"""
|
|
import os
|
|
import sys
|
|
import django
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
from decimal import Decimal
|
|
import uuid
|
|
|
|
# Add the project directory to Python path
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# Set Django settings
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dubai_analytics.settings')
|
|
|
|
# Setup Django
|
|
django.setup()
|
|
|
|
from apps.analytics.models import (
|
|
Broker, Developer, Project, Valuation, Land, Rent, Transaction, Forecast
|
|
)
|
|
from apps.users.models import User
|
|
from apps.core.models import APIRateLimit, SystemConfiguration
|
|
|
|
def safe_get(row, key, default=None):
|
|
"""Safely get value from pandas row, handling NaN values."""
|
|
try:
|
|
value = row.get(key, default)
|
|
if pd.isna(value):
|
|
return default
|
|
return value
|
|
except:
|
|
return default
|
|
|
|
def safe_decimal(value, default=0):
|
|
"""Safely convert to Decimal."""
|
|
try:
|
|
if pd.isna(value) or value == '':
|
|
return Decimal(str(default))
|
|
return Decimal(str(value))
|
|
except:
|
|
return Decimal(str(default))
|
|
|
|
def safe_datetime(value):
|
|
"""Safely convert to datetime."""
|
|
try:
|
|
if pd.isna(value) or value == '':
|
|
return None
|
|
return pd.to_datetime(value)
|
|
except:
|
|
return None
|
|
|
|
def load_brokers(csv_path):
|
|
"""Load brokers data from CSV."""
|
|
print("📊 Loading brokers data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} broker records")
|
|
|
|
brokers_created = 0
|
|
for _, row in df.iterrows():
|
|
try:
|
|
broker, created = Broker.objects.get_or_create(
|
|
broker_number=str(safe_get(row, 'BROKER_NUMBER', '')),
|
|
defaults={
|
|
'broker_name_en': str(safe_get(row, 'BROKER_EN', '')),
|
|
'gender': str(safe_get(row, 'GENDER_EN', 'male')),
|
|
'license_start_date': safe_datetime(safe_get(row, 'LICENSE_START_DATE')),
|
|
'license_end_date': safe_datetime(safe_get(row, 'LICENSE_END_DATE')),
|
|
'webpage': str(safe_get(row, 'WEBPAGE', '')),
|
|
'phone': str(safe_get(row, 'PHONE', '')),
|
|
'fax': str(safe_get(row, 'FAX', '')),
|
|
'real_estate_number': str(safe_get(row, 'REAL_ESTATE_NUMBER', '')),
|
|
'real_estate_name_en': str(safe_get(row, 'REAL_ESTATE_EN', '')),
|
|
}
|
|
)
|
|
if created:
|
|
brokers_created += 1
|
|
except Exception as e:
|
|
print(f" Warning: Error creating broker {safe_get(row, 'BROKER_NUMBER', 'unknown')}: {e}")
|
|
continue
|
|
|
|
print(f" ✅ Created {brokers_created} new brokers")
|
|
return brokers_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading brokers: {e}")
|
|
return 0
|
|
|
|
def load_projects(csv_path):
|
|
"""Load projects data from CSV."""
|
|
print("🏢 Loading projects data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} project records")
|
|
|
|
projects_created = 0
|
|
for _, row in df.iterrows():
|
|
try:
|
|
# Get or create developer
|
|
developer = None
|
|
dev_number = safe_get(row, 'DEVELOPER_NUMBER')
|
|
if dev_number and str(dev_number) != 'nan':
|
|
developer, _ = Developer.objects.get_or_create(
|
|
developer_number=str(dev_number),
|
|
defaults={'developer_name_en': str(safe_get(row, 'DEVELOPER_EN', ''))}
|
|
)
|
|
|
|
project, created = Project.objects.get_or_create(
|
|
project_number=str(safe_get(row, 'PROJECT_NUMBER', '')),
|
|
defaults={
|
|
'project_name_en': str(safe_get(row, 'PROJECT_EN', '')),
|
|
'project_status': str(safe_get(row, 'PROJECT_STATUS', 'active')),
|
|
'area_en': str(safe_get(row, 'AREA_EN', '')),
|
|
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
|
|
'developer': developer,
|
|
'total_units': int(safe_get(row, 'CNT_UNIT', 0)),
|
|
'completion_percentage': safe_decimal(safe_get(row, 'PERCENT_COMPLETED', 0)),
|
|
'launch_date': safe_datetime(safe_get(row, 'START_DATE')),
|
|
'completion_date': safe_datetime(safe_get(row, 'COMPLETION_DATE')),
|
|
}
|
|
)
|
|
if created:
|
|
projects_created += 1
|
|
except Exception as e:
|
|
print(f" Warning: Error creating project {safe_get(row, 'PROJECT_NUMBER', 'unknown')}: {e}")
|
|
continue
|
|
|
|
print(f" ✅ Created {projects_created} new projects")
|
|
return projects_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading projects: {e}")
|
|
return 0
|
|
|
|
def load_lands(csv_path):
|
|
"""Load lands data from CSV."""
|
|
print("🏞️ Loading lands data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} land records")
|
|
|
|
lands_created = 0
|
|
for _, row in df.iterrows():
|
|
try:
|
|
# Create a unique land number if not available
|
|
land_number = safe_get(row, 'PRE_REGISTRATION_NUMBER')
|
|
if not land_number or str(land_number) == 'nan':
|
|
land_number = f"LAND_{uuid.uuid4().hex[:8]}"
|
|
|
|
land, created = Land.objects.get_or_create(
|
|
land_number=str(land_number),
|
|
defaults={
|
|
'land_type': str(safe_get(row, 'LAND_TYPE_EN', '')),
|
|
'area_en': str(safe_get(row, 'AREA_EN', '')),
|
|
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
|
|
'actual_area': safe_decimal(safe_get(row, 'ACTUAL_AREA', 0)),
|
|
'is_freehold': bool(safe_get(row, 'IS_FREE_HOLD_EN', False)),
|
|
'land_use': str(safe_get(row, 'PROP_SUB_TYPE_EN', '')),
|
|
'plot_number': str(safe_get(row, 'DM_ZIP_CODE', '')),
|
|
'street_number': str(safe_get(row, 'PROJECT_NUMBER', '')),
|
|
}
|
|
)
|
|
if created:
|
|
lands_created += 1
|
|
except Exception as e:
|
|
print(f" Warning: Error creating land: {e}")
|
|
continue
|
|
|
|
print(f" ✅ Created {lands_created} new lands")
|
|
return lands_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading lands: {e}")
|
|
return 0
|
|
|
|
def load_rents(csv_path):
|
|
"""Load rents data from CSV."""
|
|
print("🏠 Loading rents data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} rent records")
|
|
|
|
rents_created = 0
|
|
for _, row in df.iterrows():
|
|
try:
|
|
# Create a unique rent number if not available
|
|
rent_number = f"RENT_{uuid.uuid4().hex[:8]}"
|
|
|
|
rent, created = Rent.objects.get_or_create(
|
|
rent_number=rent_number,
|
|
defaults={
|
|
'property_type': str(safe_get(row, 'PROP_TYPE_EN', '')),
|
|
'area_en': str(safe_get(row, 'AREA_EN', '')),
|
|
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
|
|
'rent_date': safe_datetime(safe_get(row, 'INSTANCE_DATE')),
|
|
'annual_rent': safe_decimal(safe_get(row, 'ANNUAL_RENT', 0)),
|
|
'monthly_rent': safe_decimal(safe_get(row, 'MONTHLY_RENT', 0)),
|
|
'property_area': safe_decimal(safe_get(row, 'PROCEDURE_AREA', 0)),
|
|
'rent_per_sqft': safe_decimal(safe_get(row, 'RENT_PER_SQFT', 0)),
|
|
}
|
|
)
|
|
if created:
|
|
rents_created += 1
|
|
except Exception as e:
|
|
print(f" Warning: Error creating rent: {e}")
|
|
continue
|
|
|
|
print(f" ✅ Created {rents_created} new rents")
|
|
return rents_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading rents: {e}")
|
|
return 0
|
|
|
|
def load_transactions(csv_path):
|
|
"""Load transactions data from CSV."""
|
|
print("💼 Loading transactions data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} transaction records")
|
|
|
|
transactions_created = 0
|
|
for _, row in df.iterrows():
|
|
try:
|
|
# Get or create project
|
|
project = None
|
|
project_name = safe_get(row, 'PROJECT_EN')
|
|
if project_name and str(project_name) != 'nan':
|
|
project, _ = Project.objects.get_or_create(
|
|
project_number=str(safe_get(row, 'PROJECT_NUMBER', '')),
|
|
defaults={'project_name_en': str(project_name)}
|
|
)
|
|
|
|
transaction, created = Transaction.objects.get_or_create(
|
|
transaction_number=str(safe_get(row, 'TRANSACTION_NUMBER', '')),
|
|
defaults={
|
|
'instance_date': safe_datetime(safe_get(row, 'INSTANCE_DATE')),
|
|
'area_en': str(safe_get(row, 'AREA_EN', '')),
|
|
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
|
|
'property_type': str(safe_get(row, 'PROP_TYPE_EN', '')),
|
|
'transaction_value': safe_decimal(safe_get(row, 'TRANS_VALUE', 0)),
|
|
'property_area': safe_decimal(safe_get(row, 'PROCEDURE_AREA', 0)),
|
|
'price_per_sqft': safe_decimal(safe_get(row, 'PRICE_PER_SQFT', 0)),
|
|
'group': str(safe_get(row, 'GROUP_EN', '')),
|
|
'usage': str(safe_get(row, 'USAGE_EN', '')),
|
|
'master_project': str(safe_get(row, 'MASTER_PROJECT_EN', '')),
|
|
'project': project,
|
|
}
|
|
)
|
|
if created:
|
|
transactions_created += 1
|
|
except Exception as e:
|
|
print(f" Warning: Error creating transaction {safe_get(row, 'TRANSACTION_NUMBER', 'unknown')}: {e}")
|
|
continue
|
|
|
|
print(f" ✅ Created {transactions_created} new transactions")
|
|
return transactions_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading transactions: {e}")
|
|
return 0
|
|
|
|
def load_valuations(csv_path):
|
|
"""Load valuations data from CSV."""
|
|
print("💰 Loading valuations data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} valuation records")
|
|
|
|
valuations_created = 0
|
|
for _, row in df.iterrows():
|
|
try:
|
|
# Create a unique valuation number if not available
|
|
valuation_number = f"VAL_{uuid.uuid4().hex[:8]}"
|
|
|
|
valuation, created = Valuation.objects.get_or_create(
|
|
valuation_number=valuation_number,
|
|
defaults={
|
|
'property_type': str(safe_get(row, 'PROP_TYPE_EN', '')),
|
|
'area_en': str(safe_get(row, 'AREA_EN', '')),
|
|
'zone_en': str(safe_get(row, 'ZONE_EN', '')),
|
|
'valuation_date': safe_datetime(safe_get(row, 'VALUATION_DATE')),
|
|
'property_value': safe_decimal(safe_get(row, 'PROPERTY_VALUE', 0)),
|
|
'land_value': safe_decimal(safe_get(row, 'LAND_VALUE', 0)),
|
|
'building_value': safe_decimal(safe_get(row, 'BUILDING_VALUE', 0)),
|
|
'total_area': safe_decimal(safe_get(row, 'TOTAL_AREA', 0)),
|
|
'land_area': safe_decimal(safe_get(row, 'LAND_AREA', 0)),
|
|
'building_area': safe_decimal(safe_get(row, 'BUILDING_AREA', 0)),
|
|
}
|
|
)
|
|
if created:
|
|
valuations_created += 1
|
|
except Exception as e:
|
|
print(f" Warning: Error creating valuation: {e}")
|
|
continue
|
|
|
|
print(f" ✅ Created {valuations_created} new valuations")
|
|
return valuations_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading valuations: {e}")
|
|
return 0
|
|
|
|
def create_sample_forecasts():
|
|
"""Create sample forecast data."""
|
|
print("🔮 Creating sample forecasts...")
|
|
try:
|
|
# Get some sample areas and property types from transactions
|
|
areas = Transaction.objects.values_list('area_en', flat=True).distinct()[:5]
|
|
property_types = Transaction.objects.values_list('property_type', flat=True).distinct()[:3]
|
|
|
|
forecasts_created = 0
|
|
for area in areas:
|
|
for prop_type in property_types:
|
|
if not area or not prop_type:
|
|
continue
|
|
|
|
forecast, created = Forecast.objects.get_or_create(
|
|
area_en=area,
|
|
property_type=prop_type,
|
|
defaults={
|
|
'forecast_date': datetime.now().date(),
|
|
'predicted_price': Decimal('1000000.00'),
|
|
'confidence_interval_lower': Decimal('800000.00'),
|
|
'confidence_interval_upper': Decimal('1200000.00'),
|
|
'model_version': '1.0',
|
|
'accuracy_score': Decimal('0.85'),
|
|
'metadata': {'source': 'sample_data', 'model': 'linear_regression'}
|
|
}
|
|
)
|
|
if created:
|
|
forecasts_created += 1
|
|
|
|
print(f" ✅ Created {forecasts_created} sample forecasts")
|
|
return forecasts_created
|
|
except Exception as e:
|
|
print(f" ❌ Error creating forecasts: {e}")
|
|
return 0
|
|
|
|
def setup_rate_limits():
|
|
"""Setup default rate limits for different subscription tiers."""
|
|
print("⚙️ Setting up rate limits...")
|
|
try:
|
|
rate_limits = [
|
|
{'subscription_type': 'free', 'requests_per_minute': 10, 'requests_per_hour': 100, 'requests_per_day': 1000},
|
|
{'subscription_type': 'paid', 'requests_per_minute': 60, 'requests_per_hour': 1000, 'requests_per_day': 10000},
|
|
{'subscription_type': 'premium', 'requests_per_minute': 120, 'requests_per_hour': 2000, 'requests_per_day': 20000},
|
|
]
|
|
|
|
created_count = 0
|
|
for limit_data in rate_limits:
|
|
rate_limit, created = APIRateLimit.objects.get_or_create(
|
|
subscription_type=limit_data['subscription_type'],
|
|
defaults=limit_data
|
|
)
|
|
if created:
|
|
created_count += 1
|
|
|
|
print(f" ✅ Created {created_count} rate limit configurations")
|
|
return created_count
|
|
except Exception as e:
|
|
print(f" ❌ Error setting up rate limits: {e}")
|
|
return 0
|
|
|
|
def main():
|
|
"""Main function to load all CSV data."""
|
|
print("=" * 60)
|
|
print(" Dubai Analytics Platform - CSV Data Loader v2")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
# Check if sample data directory exists
|
|
sample_data_dir = "sample data"
|
|
if not os.path.exists(sample_data_dir):
|
|
print(f"❌ Sample data directory '{sample_data_dir}' not found!")
|
|
print(" Please ensure the CSV files are in the 'sample data' directory.")
|
|
return
|
|
|
|
# Track total records created
|
|
total_created = 0
|
|
|
|
# Load each CSV file
|
|
csv_files = [
|
|
('brokers.csv', load_brokers),
|
|
('projects.csv', load_projects),
|
|
('lands.csv', load_lands),
|
|
('rents.csv', load_rents),
|
|
('transactions.csv', load_transactions),
|
|
('valuations.csv', load_valuations),
|
|
]
|
|
|
|
for csv_file, load_function in csv_files:
|
|
csv_path = os.path.join(sample_data_dir, csv_file)
|
|
if os.path.exists(csv_path):
|
|
created = load_function(csv_path)
|
|
total_created += created
|
|
else:
|
|
print(f"⚠️ File {csv_file} not found, skipping...")
|
|
print()
|
|
|
|
# Create sample forecasts
|
|
forecasts_created = create_sample_forecasts()
|
|
total_created += forecasts_created
|
|
print()
|
|
|
|
# Setup rate limits
|
|
rate_limits_created = setup_rate_limits()
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 60)
|
|
print(" Data Loading Summary")
|
|
print("=" * 60)
|
|
print(f"📊 Total records created: {total_created}")
|
|
print(f"⚙️ Rate limits configured: {rate_limits_created}")
|
|
print()
|
|
print("✅ Data loading completed successfully!")
|
|
print()
|
|
print("Next steps:")
|
|
print("1. Access Django Admin: http://localhost:8000/admin/")
|
|
print("2. Login with: admin@dubai-analytics.com / admin123")
|
|
print("3. View the loaded data in the admin interface")
|
|
print("4. Test the API endpoints with the sample data")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|