385 lines
15 KiB
Python
Executable File
385 lines
15 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""
|
|
CSV Data Loading Script for Dubai Analytics Platform
|
|
Loads all CSV data from the sample data directory into the database.
|
|
"""
|
|
import os
|
|
import sys
|
|
import django
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
from decimal import Decimal
|
|
import uuid
|
|
|
|
# Add the project directory to Python path
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# Set Django settings
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dubai_analytics.settings')
|
|
|
|
# Setup Django
|
|
django.setup()
|
|
|
|
from apps.analytics.models import (
|
|
Broker, Developer, Project, Valuation, Land, Rent, Transaction, Forecast
|
|
)
|
|
from apps.users.models import User
|
|
from apps.core.models import APIRateLimit, SystemConfiguration
|
|
|
|
def load_brokers(csv_path):
|
|
"""Load brokers data from CSV."""
|
|
print("📊 Loading brokers data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} broker records")
|
|
|
|
brokers_created = 0
|
|
for _, row in df.iterrows():
|
|
broker, created = Broker.objects.get_or_create(
|
|
broker_number=row['BROKER_NUMBER'],
|
|
defaults={
|
|
'broker_name_en': row['BROKER_EN'],
|
|
'gender': row.get('GENDER_EN', 'male'),
|
|
'license_start_date': pd.to_datetime(row['LICENSE_START_DATE']),
|
|
'license_end_date': pd.to_datetime(row['LICENSE_END_DATE']),
|
|
'webpage': row.get('WEBPAGE', ''),
|
|
'phone': row.get('PHONE', ''),
|
|
'fax': row.get('FAX', ''),
|
|
'real_estate_number': row.get('REAL_ESTATE_NUMBER', ''),
|
|
'real_estate_name_en': row.get('REAL_ESTATE_EN', ''),
|
|
}
|
|
)
|
|
if created:
|
|
brokers_created += 1
|
|
|
|
print(f" ✅ Created {brokers_created} new brokers")
|
|
return brokers_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading brokers: {e}")
|
|
return 0
|
|
|
|
def load_developers(csv_path):
|
|
"""Load developers data from CSV."""
|
|
print("🏗️ Loading developers data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} developer records")
|
|
|
|
developers_created = 0
|
|
for _, row in df.iterrows():
|
|
developer, created = Developer.objects.get_or_create(
|
|
developer_number=row['developer_number'],
|
|
defaults={
|
|
'developer_name_en': row['developer_name_en'],
|
|
}
|
|
)
|
|
if created:
|
|
developers_created += 1
|
|
|
|
print(f" ✅ Created {developers_created} new developers")
|
|
return developers_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading developers: {e}")
|
|
return 0
|
|
|
|
def load_projects(csv_path):
|
|
"""Load projects data from CSV."""
|
|
print("🏢 Loading projects data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} project records")
|
|
|
|
projects_created = 0
|
|
for _, row in df.iterrows():
|
|
# Get or create developer
|
|
developer = None
|
|
if pd.notna(row.get('developer')):
|
|
developer, _ = Developer.objects.get_or_create(
|
|
developer_number=row['developer'],
|
|
defaults={'developer_name_en': row['developer']}
|
|
)
|
|
|
|
project, created = Project.objects.get_or_create(
|
|
project_number=row['project_number'],
|
|
defaults={
|
|
'project_name_en': row['project_name_en'],
|
|
'project_status': row.get('project_status', 'active'),
|
|
'area_en': row.get('area_en', ''),
|
|
'zone_en': row.get('zone_en', ''),
|
|
'developer': developer,
|
|
'total_units': row.get('total_units', 0),
|
|
'completion_percentage': row.get('completion_percentage', 0),
|
|
'launch_date': pd.to_datetime(row['launch_date']) if pd.notna(row.get('launch_date')) else None,
|
|
'completion_date': pd.to_datetime(row['completion_date']) if pd.notna(row.get('completion_date')) else None,
|
|
}
|
|
)
|
|
if created:
|
|
projects_created += 1
|
|
|
|
print(f" ✅ Created {projects_created} new projects")
|
|
return projects_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading projects: {e}")
|
|
return 0
|
|
|
|
def load_valuations(csv_path):
|
|
"""Load valuations data from CSV."""
|
|
print("💰 Loading valuations data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} valuation records")
|
|
|
|
valuations_created = 0
|
|
for _, row in df.iterrows():
|
|
valuation, created = Valuation.objects.get_or_create(
|
|
valuation_number=row['valuation_number'],
|
|
defaults={
|
|
'property_type': row.get('property_type', ''),
|
|
'area_en': row.get('area_en', ''),
|
|
'zone_en': row.get('zone_en', ''),
|
|
'valuation_date': pd.to_datetime(row['valuation_date']),
|
|
'property_value': Decimal(str(row['property_value'])) if pd.notna(row.get('property_value')) else None,
|
|
'land_value': Decimal(str(row['land_value'])) if pd.notna(row.get('land_value')) else None,
|
|
'building_value': Decimal(str(row['building_value'])) if pd.notna(row.get('building_value')) else None,
|
|
'total_area': row.get('total_area', 0),
|
|
'land_area': row.get('land_area', 0),
|
|
'building_area': row.get('building_area', 0),
|
|
}
|
|
)
|
|
if created:
|
|
valuations_created += 1
|
|
|
|
print(f" ✅ Created {valuations_created} new valuations")
|
|
return valuations_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading valuations: {e}")
|
|
return 0
|
|
|
|
def load_lands(csv_path):
|
|
"""Load lands data from CSV."""
|
|
print("🏞️ Loading lands data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} land records")
|
|
|
|
lands_created = 0
|
|
for _, row in df.iterrows():
|
|
land, created = Land.objects.get_or_create(
|
|
land_number=row['land_number'],
|
|
defaults={
|
|
'land_type': row.get('land_type', ''),
|
|
'area_en': row.get('area_en', ''),
|
|
'zone_en': row.get('zone_en', ''),
|
|
'actual_area': Decimal(str(row['actual_area'])) if pd.notna(row.get('actual_area')) else None,
|
|
'is_freehold': row.get('is_freehold', False),
|
|
'land_use': row.get('land_use', ''),
|
|
'plot_number': row.get('plot_number', ''),
|
|
'street_number': row.get('street_number', ''),
|
|
}
|
|
)
|
|
if created:
|
|
lands_created += 1
|
|
|
|
print(f" ✅ Created {lands_created} new lands")
|
|
return lands_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading lands: {e}")
|
|
return 0
|
|
|
|
def load_rents(csv_path):
|
|
"""Load rents data from CSV."""
|
|
print("🏠 Loading rents data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} rent records")
|
|
|
|
rents_created = 0
|
|
for _, row in df.iterrows():
|
|
rent, created = Rent.objects.get_or_create(
|
|
rent_number=row['rent_number'],
|
|
defaults={
|
|
'property_type': row.get('property_type', ''),
|
|
'area_en': row.get('area_en', ''),
|
|
'zone_en': row.get('zone_en', ''),
|
|
'rent_date': pd.to_datetime(row['rent_date']),
|
|
'annual_rent': Decimal(str(row['annual_rent'])) if pd.notna(row.get('annual_rent')) else None,
|
|
'monthly_rent': Decimal(str(row['monthly_rent'])) if pd.notna(row.get('monthly_rent')) else None,
|
|
'property_area': row.get('property_area', 0),
|
|
'rent_per_sqft': Decimal(str(row['rent_per_sqft'])) if pd.notna(row.get('rent_per_sqft')) else None,
|
|
}
|
|
)
|
|
if created:
|
|
rents_created += 1
|
|
|
|
print(f" ✅ Created {rents_created} new rents")
|
|
return rents_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading rents: {e}")
|
|
return 0
|
|
|
|
def load_transactions(csv_path):
|
|
"""Load transactions data from CSV."""
|
|
print("💼 Loading transactions data...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} transaction records")
|
|
|
|
transactions_created = 0
|
|
for _, row in df.iterrows():
|
|
# Get or create project
|
|
project = None
|
|
if pd.notna(row.get('project')):
|
|
project, _ = Project.objects.get_or_create(
|
|
project_number=row['project'],
|
|
defaults={'project_name_en': row['project']}
|
|
)
|
|
|
|
transaction, created = Transaction.objects.get_or_create(
|
|
transaction_number=row['transaction_number'],
|
|
defaults={
|
|
'instance_date': pd.to_datetime(row['instance_date']),
|
|
'area_en': row.get('area_en', ''),
|
|
'zone_en': row.get('zone_en', ''),
|
|
'property_type': row.get('property_type', ''),
|
|
'transaction_value': Decimal(str(row['transaction_value'])) if pd.notna(row.get('transaction_value')) else None,
|
|
'property_area': row.get('property_area', 0),
|
|
'price_per_sqft': Decimal(str(row['price_per_sqft'])) if pd.notna(row.get('price_per_sqft')) else None,
|
|
'group': row.get('group', ''),
|
|
'usage': row.get('usage', ''),
|
|
'master_project': row.get('master_project', ''),
|
|
'project': project,
|
|
}
|
|
)
|
|
if created:
|
|
transactions_created += 1
|
|
|
|
print(f" ✅ Created {transactions_created} new transactions")
|
|
return transactions_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading transactions: {e}")
|
|
return 0
|
|
|
|
def create_sample_forecasts():
|
|
"""Create sample forecast data."""
|
|
print("🔮 Creating sample forecasts...")
|
|
try:
|
|
# Get some sample areas and property types from transactions
|
|
areas = Transaction.objects.values_list('area_en', flat=True).distinct()[:5]
|
|
property_types = Transaction.objects.values_list('property_type', flat=True).distinct()[:3]
|
|
|
|
forecasts_created = 0
|
|
for area in areas:
|
|
for prop_type in property_types:
|
|
if not area or not prop_type:
|
|
continue
|
|
|
|
forecast, created = Forecast.objects.get_or_create(
|
|
area_en=area,
|
|
property_type=prop_type,
|
|
defaults={
|
|
'forecast_date': datetime.now().date(),
|
|
'predicted_price': Decimal('1000000.00'),
|
|
'confidence_interval_lower': Decimal('800000.00'),
|
|
'confidence_interval_upper': Decimal('1200000.00'),
|
|
'model_version': '1.0',
|
|
'accuracy_score': Decimal('0.85'),
|
|
'metadata': {'source': 'sample_data', 'model': 'linear_regression'}
|
|
}
|
|
)
|
|
if created:
|
|
forecasts_created += 1
|
|
|
|
print(f" ✅ Created {forecasts_created} sample forecasts")
|
|
return forecasts_created
|
|
except Exception as e:
|
|
print(f" ❌ Error creating forecasts: {e}")
|
|
return 0
|
|
|
|
def setup_rate_limits():
|
|
"""Setup default rate limits for different subscription tiers."""
|
|
print("⚙️ Setting up rate limits...")
|
|
try:
|
|
rate_limits = [
|
|
{'subscription_type': 'free', 'requests_per_minute': 10, 'requests_per_hour': 100, 'requests_per_day': 1000},
|
|
{'subscription_type': 'paid', 'requests_per_minute': 60, 'requests_per_hour': 1000, 'requests_per_day': 10000},
|
|
{'subscription_type': 'premium', 'requests_per_minute': 120, 'requests_per_hour': 2000, 'requests_per_day': 20000},
|
|
]
|
|
|
|
created_count = 0
|
|
for limit_data in rate_limits:
|
|
rate_limit, created = APIRateLimit.objects.get_or_create(
|
|
subscription_type=limit_data['subscription_type'],
|
|
defaults=limit_data
|
|
)
|
|
if created:
|
|
created_count += 1
|
|
|
|
print(f" ✅ Created {created_count} rate limit configurations")
|
|
return created_count
|
|
except Exception as e:
|
|
print(f" ❌ Error setting up rate limits: {e}")
|
|
return 0
|
|
|
|
def main():
|
|
"""Main function to load all CSV data."""
|
|
print("=" * 60)
|
|
print(" Dubai Analytics Platform - CSV Data Loader")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
# Check if sample data directory exists
|
|
sample_data_dir = "sample data"
|
|
if not os.path.exists(sample_data_dir):
|
|
print(f"❌ Sample data directory '{sample_data_dir}' not found!")
|
|
print(" Please ensure the CSV files are in the 'sample data' directory.")
|
|
return
|
|
|
|
# Track total records created
|
|
total_created = 0
|
|
|
|
# Load each CSV file
|
|
csv_files = [
|
|
('brokers.csv', load_brokers),
|
|
('lands.csv', load_lands),
|
|
('projects.csv', load_projects),
|
|
('rents.csv', load_rents),
|
|
('transactions.csv', load_transactions),
|
|
('valuations.csv', load_valuations),
|
|
]
|
|
|
|
for csv_file, load_function in csv_files:
|
|
csv_path = os.path.join(sample_data_dir, csv_file)
|
|
if os.path.exists(csv_path):
|
|
created = load_function(csv_path)
|
|
total_created += created
|
|
else:
|
|
print(f"⚠️ File {csv_file} not found, skipping...")
|
|
print()
|
|
|
|
# Create sample forecasts
|
|
forecasts_created = create_sample_forecasts()
|
|
total_created += forecasts_created
|
|
print()
|
|
|
|
# Setup rate limits
|
|
rate_limits_created = setup_rate_limits()
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 60)
|
|
print(" Data Loading Summary")
|
|
print("=" * 60)
|
|
print(f"📊 Total records created: {total_created}")
|
|
print(f"⚙️ Rate limits configured: {rate_limits_created}")
|
|
print()
|
|
print("✅ Data loading completed successfully!")
|
|
print()
|
|
print("Next steps:")
|
|
print("1. Access Django Admin: http://localhost:8000/admin/")
|
|
print("2. Login with: admin@dubai-analytics.com / admin123")
|
|
print("3. View the loaded data in the admin interface")
|
|
print("4. Test the API endpoints with the sample data")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|