654 lines
28 KiB
Python
Executable File
654 lines
28 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""
|
|
Load All Data in Chunks - Comprehensive Script
|
|
Loads all CSV data in manageable chunks with progress tracking.
|
|
"""
|
|
import os
|
|
import sys
|
|
import django
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
from decimal import Decimal
|
|
import uuid
|
|
import traceback
|
|
|
|
# Add the project directory to Python path
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# Set Django settings
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dubai_analytics.settings')
|
|
|
|
# Setup Django
|
|
django.setup()
|
|
|
|
from apps.analytics.models import (
|
|
Broker, Developer, Project, Valuation, Land, Rent, Transaction, Forecast
|
|
)
|
|
from apps.users.models import User
|
|
from apps.core.models import APIRateLimit, SystemConfiguration
|
|
|
|
def safe_get(row, key, default=None):
|
|
"""Safely get value from pandas row, handling NaN values."""
|
|
try:
|
|
value = row.get(key, default)
|
|
if pd.isna(value) or value == '' or str(value).lower() == 'nan':
|
|
return default
|
|
return value
|
|
except:
|
|
return default
|
|
|
|
def safe_decimal(value, default=0):
|
|
"""Safely convert to Decimal."""
|
|
try:
|
|
if pd.isna(value) or value == '' or str(value).lower() == 'nan':
|
|
return Decimal(str(default))
|
|
return Decimal(str(value))
|
|
except:
|
|
return Decimal(str(default))
|
|
|
|
def safe_datetime(value):
|
|
"""Safely convert to datetime."""
|
|
try:
|
|
if pd.isna(value) or value == '' or str(value).lower() == 'nan':
|
|
return None
|
|
return pd.to_datetime(value)
|
|
except:
|
|
return None
|
|
|
|
def safe_int(value, default=0):
|
|
"""Safely convert to int."""
|
|
try:
|
|
if pd.isna(value) or value == '' or str(value).lower() == 'nan':
|
|
return default
|
|
return int(float(value))
|
|
except:
|
|
return default
|
|
|
|
def safe_str(value, default=''):
|
|
"""Safely convert to string."""
|
|
try:
|
|
if pd.isna(value) or value == '' or str(value).lower() == 'nan':
|
|
return default
|
|
return str(value).strip()
|
|
except:
|
|
return default
|
|
|
|
def load_brokers_chunks(csv_path, chunk_size=1000, max_chunks=5):
|
|
"""Load brokers data in chunks."""
|
|
print("📊 Loading brokers data in chunks...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} broker records")
|
|
|
|
total_created = 0
|
|
total_errors = 0
|
|
|
|
# Process in chunks
|
|
for chunk_num in range(min(max_chunks, len(df) // chunk_size + 1)):
|
|
start_idx = chunk_num * chunk_size
|
|
end_idx = min((chunk_num + 1) * chunk_size, len(df))
|
|
chunk_df = df.iloc[start_idx:end_idx]
|
|
|
|
print(f" Processing chunk {chunk_num + 1} (rows {start_idx + 1}-{end_idx})...")
|
|
|
|
chunk_created = 0
|
|
chunk_errors = 0
|
|
|
|
for _, row in chunk_df.iterrows():
|
|
try:
|
|
broker_number = safe_str(row.get('BROKER_NUMBER', ''))
|
|
if not broker_number:
|
|
chunk_errors += 1
|
|
continue
|
|
|
|
broker, created = Broker.objects.get_or_create(
|
|
broker_number=broker_number,
|
|
defaults={
|
|
'broker_name_en': safe_str(row.get('BROKER_EN', '')),
|
|
'gender': safe_str(row.get('GENDER_EN', 'male')),
|
|
'license_start_date': safe_datetime(row.get('LICENSE_START_DATE')) or datetime.now(),
|
|
'license_end_date': safe_datetime(row.get('LICENSE_END_DATE')) or datetime.now(),
|
|
'webpage': safe_str(row.get('WEBPAGE', '')),
|
|
'phone': safe_str(row.get('PHONE', '')),
|
|
'fax': safe_str(row.get('FAX', '')),
|
|
'real_estate_number': safe_str(row.get('REAL_ESTATE_NUMBER', '')),
|
|
'real_estate_name_en': safe_str(row.get('REAL_ESTATE_EN', '')),
|
|
}
|
|
)
|
|
if created:
|
|
chunk_created += 1
|
|
except Exception as e:
|
|
chunk_errors += 1
|
|
|
|
print(f" ✅ Chunk {chunk_num + 1}: {chunk_created} created, {chunk_errors} errors")
|
|
total_created += chunk_created
|
|
total_errors += chunk_errors
|
|
|
|
print(f" 📊 Brokers Summary: {total_created} created, {total_errors} errors")
|
|
return total_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading brokers: {e}")
|
|
return 0
|
|
|
|
def load_developers_chunks(csv_path, chunk_size=1000, max_chunks=5):
|
|
"""Load developers data in chunks."""
|
|
print("🏗️ Loading developers data in chunks...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} developer records")
|
|
|
|
total_created = 0
|
|
total_errors = 0
|
|
|
|
# Process in chunks
|
|
for chunk_num in range(min(max_chunks, len(df) // chunk_size + 1)):
|
|
start_idx = chunk_num * chunk_size
|
|
end_idx = min((chunk_num + 1) * chunk_size, len(df))
|
|
chunk_df = df.iloc[start_idx:end_idx]
|
|
|
|
print(f" Processing chunk {chunk_num + 1} (rows {start_idx + 1}-{end_idx})...")
|
|
|
|
chunk_created = 0
|
|
chunk_errors = 0
|
|
|
|
for _, row in chunk_df.iterrows():
|
|
try:
|
|
developer_number = safe_str(row.get('DEVELOPER_NUMBER', ''))
|
|
if not developer_number:
|
|
chunk_errors += 1
|
|
continue
|
|
|
|
developer, created = Developer.objects.get_or_create(
|
|
developer_number=developer_number,
|
|
defaults={
|
|
'developer_name_en': safe_str(row.get('DEVELOPER_EN', '')),
|
|
}
|
|
)
|
|
if created:
|
|
chunk_created += 1
|
|
except Exception as e:
|
|
chunk_errors += 1
|
|
|
|
print(f" ✅ Chunk {chunk_num + 1}: {chunk_created} created, {chunk_errors} errors")
|
|
total_created += chunk_created
|
|
total_errors += chunk_errors
|
|
|
|
print(f" 🏗️ Developers Summary: {total_created} created, {total_errors} errors")
|
|
return total_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading developers: {e}")
|
|
return 0
|
|
|
|
def load_projects_chunks(csv_path, chunk_size=1000, max_chunks=5):
|
|
"""Load projects data in chunks."""
|
|
print("🏢 Loading projects data in chunks...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} project records")
|
|
|
|
total_created = 0
|
|
total_errors = 0
|
|
|
|
# Process in chunks
|
|
for chunk_num in range(min(max_chunks, len(df) // chunk_size + 1)):
|
|
start_idx = chunk_num * chunk_size
|
|
end_idx = min((chunk_num + 1) * chunk_size, len(df))
|
|
chunk_df = df.iloc[start_idx:end_idx]
|
|
|
|
print(f" Processing chunk {chunk_num + 1} (rows {start_idx + 1}-{end_idx})...")
|
|
|
|
chunk_created = 0
|
|
chunk_errors = 0
|
|
|
|
for _, row in chunk_df.iterrows():
|
|
try:
|
|
project_number = safe_str(row.get('PROJECT_NUMBER', ''))
|
|
if not project_number:
|
|
chunk_errors += 1
|
|
continue
|
|
|
|
# Get or create developer
|
|
developer = None
|
|
dev_number = safe_str(row.get('DEVELOPER_NUMBER', ''))
|
|
if dev_number:
|
|
developer, _ = Developer.objects.get_or_create(
|
|
developer_number=dev_number,
|
|
defaults={'developer_name_en': safe_str(row.get('DEVELOPER_EN', ''))}
|
|
)
|
|
|
|
project, created = Project.objects.get_or_create(
|
|
project_number=project_number,
|
|
defaults={
|
|
'project_name_en': safe_str(row.get('PROJECT_EN', '')),
|
|
'developer': developer,
|
|
'start_date': safe_datetime(row.get('START_DATE')) or datetime.now(),
|
|
'end_date': safe_datetime(row.get('END_DATE')),
|
|
'adoption_date': safe_datetime(row.get('ADOPTION_DATE')),
|
|
'project_type': safe_str(row.get('PRJ_TYPE_EN', 'Normal')),
|
|
'project_value': safe_decimal(row.get('PROJECT_VALUE', 0)),
|
|
'escrow_account_number': safe_str(row.get('ESCROW_ACCOUNT_NUMBER', '')),
|
|
'project_status': safe_str(row.get('PROJECT_STATUS', 'ACTIVE')),
|
|
'percent_completed': safe_decimal(row.get('PERCENT_COMPLETED', 0)),
|
|
'inspection_date': safe_datetime(row.get('INSPECTION_DATE')),
|
|
'completion_date': safe_datetime(row.get('COMPLETION_DATE')),
|
|
'description_en': safe_str(row.get('DESCRIPTION_EN', '')),
|
|
'area_en': safe_str(row.get('AREA_EN', '')),
|
|
'zone_en': safe_str(row.get('ZONE_EN', '')),
|
|
'count_land': safe_int(row.get('CNT_LAND', 0)),
|
|
'count_building': safe_int(row.get('CNT_BUILDING', 0)),
|
|
'count_villa': safe_int(row.get('CNT_VILLA', 0)),
|
|
'count_unit': safe_int(row.get('CNT_UNIT', 0)),
|
|
'master_project_en': safe_str(row.get('MASTER_PROJECT_EN', '')),
|
|
}
|
|
)
|
|
if created:
|
|
chunk_created += 1
|
|
except Exception as e:
|
|
chunk_errors += 1
|
|
|
|
print(f" ✅ Chunk {chunk_num + 1}: {chunk_created} created, {chunk_errors} errors")
|
|
total_created += chunk_created
|
|
total_errors += chunk_errors
|
|
|
|
print(f" 🏢 Projects Summary: {total_created} created, {total_errors} errors")
|
|
return total_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading projects: {e}")
|
|
return 0
|
|
|
|
def load_lands_chunks(csv_path, chunk_size=1000, max_chunks=5):
|
|
"""Load lands data in chunks."""
|
|
print("🏞️ Loading lands data in chunks...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} land records")
|
|
|
|
total_created = 0
|
|
total_errors = 0
|
|
|
|
# Process in chunks
|
|
for chunk_num in range(min(max_chunks, len(df) // chunk_size + 1)):
|
|
start_idx = chunk_num * chunk_size
|
|
end_idx = min((chunk_num + 1) * chunk_size, len(df))
|
|
chunk_df = df.iloc[start_idx:end_idx]
|
|
|
|
print(f" Processing chunk {chunk_num + 1} (rows {start_idx + 1}-{end_idx})...")
|
|
|
|
chunk_created = 0
|
|
chunk_errors = 0
|
|
|
|
for _, row in chunk_df.iterrows():
|
|
try:
|
|
# Create a unique land identifier
|
|
land_id = f"LAND_{uuid.uuid4().hex[:8]}"
|
|
|
|
# Get or create project if available
|
|
project = None
|
|
project_name = safe_str(row.get('PROJECT_EN', ''))
|
|
if project_name:
|
|
project, _ = Project.objects.get_or_create(
|
|
project_number=safe_str(row.get('PROJECT_NUMBER', '')),
|
|
defaults={'project_name_en': project_name}
|
|
)
|
|
|
|
land, created = Land.objects.get_or_create(
|
|
pre_registration_number=land_id,
|
|
defaults={
|
|
'land_type': safe_str(row.get('LAND_TYPE_EN', 'Residential')),
|
|
'property_sub_type': safe_str(row.get('PROP_SUB_TYPE_EN', '')),
|
|
'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
|
|
'is_offplan': safe_str(row.get('IS_OFFPLAN_EN', 'Ready')),
|
|
'is_freehold': safe_str(row.get('IS_FREE_HOLD_EN', 'Free Hold')),
|
|
'dm_zip_code': safe_str(row.get('DM_ZIP_CODE', '')),
|
|
'master_project': safe_str(row.get('MASTER_PROJECT_EN', '')),
|
|
'project': project,
|
|
'area_en': safe_str(row.get('AREA_EN', '')),
|
|
'zone_en': safe_str(row.get('ZONE_EN', '')),
|
|
}
|
|
)
|
|
if created:
|
|
chunk_created += 1
|
|
except Exception as e:
|
|
chunk_errors += 1
|
|
|
|
print(f" ✅ Chunk {chunk_num + 1}: {chunk_created} created, {chunk_errors} errors")
|
|
total_created += chunk_created
|
|
total_errors += chunk_errors
|
|
|
|
print(f" 🏞️ Lands Summary: {total_created} created, {total_errors} errors")
|
|
return total_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading lands: {e}")
|
|
return 0
|
|
|
|
def load_rents_chunks(csv_path, chunk_size=1000, max_chunks=5):
|
|
"""Load rents data in chunks."""
|
|
print("🏠 Loading rents data in chunks...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} rent records")
|
|
|
|
total_created = 0
|
|
total_errors = 0
|
|
|
|
# Process in chunks
|
|
for chunk_num in range(min(max_chunks, len(df) // chunk_size + 1)):
|
|
start_idx = chunk_num * chunk_size
|
|
end_idx = min((chunk_num + 1) * chunk_size, len(df))
|
|
chunk_df = df.iloc[start_idx:end_idx]
|
|
|
|
print(f" Processing chunk {chunk_num + 1} (rows {start_idx + 1}-{end_idx})...")
|
|
|
|
chunk_created = 0
|
|
chunk_errors = 0
|
|
|
|
for _, row in chunk_df.iterrows():
|
|
try:
|
|
# Create a unique rent identifier
|
|
rent_id = f"RENT_{uuid.uuid4().hex[:8]}"
|
|
|
|
rent, created = Rent.objects.get_or_create(
|
|
registration_date=safe_datetime(row.get('REGISTRATION_DATE')) or datetime.now(),
|
|
start_date=safe_datetime(row.get('START_DATE')) or datetime.now(),
|
|
end_date=safe_datetime(row.get('END_DATE')) or datetime.now(),
|
|
defaults={
|
|
'version': safe_str(row.get('VERSION', 'New')),
|
|
'area_en': safe_str(row.get('AREA_EN', '')),
|
|
'contract_amount': safe_decimal(row.get('CONTRACT_AMOUNT', 0)),
|
|
'annual_amount': safe_decimal(row.get('ANNUAL_AMOUNT', 0)),
|
|
'is_freehold': safe_str(row.get('IS_FREE_HOLD', 'Free Hold')),
|
|
'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
|
|
'property_type': safe_str(row.get('PROPERTY_TYPE', 'Unit')),
|
|
'property_sub_type': safe_str(row.get('PROPERTY_SUB_TYPE', '')),
|
|
'rooms': safe_str(row.get('ROOMS', '')),
|
|
'usage': safe_str(row.get('USAGE', 'Residential')),
|
|
}
|
|
)
|
|
if created:
|
|
chunk_created += 1
|
|
except Exception as e:
|
|
chunk_errors += 1
|
|
|
|
print(f" ✅ Chunk {chunk_num + 1}: {chunk_created} created, {chunk_errors} errors")
|
|
total_created += chunk_created
|
|
total_errors += chunk_errors
|
|
|
|
print(f" 🏠 Rents Summary: {total_created} created, {total_errors} errors")
|
|
return total_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading rents: {e}")
|
|
return 0
|
|
|
|
def load_transactions_chunks(csv_path, chunk_size=1000, max_chunks=5):
|
|
"""Load transactions data in chunks."""
|
|
print("💼 Loading transactions data in chunks...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} transaction records")
|
|
|
|
total_created = 0
|
|
total_errors = 0
|
|
|
|
# Process in chunks
|
|
for chunk_num in range(min(max_chunks, len(df) // chunk_size + 1)):
|
|
start_idx = chunk_num * chunk_size
|
|
end_idx = min((chunk_num + 1) * chunk_size, len(df))
|
|
chunk_df = df.iloc[start_idx:end_idx]
|
|
|
|
print(f" Processing chunk {chunk_num + 1} (rows {start_idx + 1}-{end_idx})...")
|
|
|
|
chunk_created = 0
|
|
chunk_errors = 0
|
|
|
|
for _, row in chunk_df.iterrows():
|
|
try:
|
|
transaction_number = safe_str(row.get('TRANSACTION_NUMBER', ''))
|
|
if not transaction_number:
|
|
chunk_errors += 1
|
|
continue
|
|
|
|
# Get or create project with proper developer
|
|
project = None
|
|
project_name = safe_str(row.get('PROJECT_EN', ''))
|
|
if project_name:
|
|
# Create a default developer if needed
|
|
default_developer, _ = Developer.objects.get_or_create(
|
|
developer_number='DEFAULT',
|
|
defaults={'developer_name_en': 'Default Developer'}
|
|
)
|
|
|
|
project, _ = Project.objects.get_or_create(
|
|
project_number=safe_str(row.get('PROJECT_NUMBER', '')),
|
|
defaults={
|
|
'project_name_en': project_name,
|
|
'developer': default_developer,
|
|
'start_date': datetime.now(),
|
|
'project_status': 'ACTIVE',
|
|
'area_en': safe_str(row.get('AREA_EN', '')),
|
|
'zone_en': safe_str(row.get('ZONE_EN', '')),
|
|
}
|
|
)
|
|
|
|
transaction, created = Transaction.objects.get_or_create(
|
|
transaction_number=transaction_number,
|
|
defaults={
|
|
'instance_date': safe_datetime(row.get('INSTANCE_DATE')) or datetime.now(),
|
|
'group': safe_str(row.get('GROUP_EN', 'Sale')),
|
|
'procedure': safe_str(row.get('PROCEDURE_EN', '')),
|
|
'is_offplan': safe_str(row.get('IS_OFFPLAN_EN', 'Ready')),
|
|
'is_freehold': safe_str(row.get('IS_FREE_HOLD_EN', 'Free Hold')),
|
|
'usage': safe_str(row.get('USAGE_EN', 'Residential')),
|
|
'area_en': safe_str(row.get('AREA_EN', '')),
|
|
'property_type': safe_str(row.get('PROP_TYPE_EN', 'Unit')),
|
|
'property_sub_type': safe_str(row.get('PROP_SB_TYPE_EN', '')),
|
|
'transaction_value': safe_decimal(row.get('TRANS_VALUE', 0)),
|
|
'procedure_area': safe_decimal(row.get('PROCEDURE_AREA', 0)),
|
|
'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
|
|
'rooms': safe_str(row.get('ROOMS_EN', '')),
|
|
'parking': int(safe_str(row.get('PARKING', 0)) or 0),
|
|
'nearest_metro': safe_str(row.get('NEAREST_METRO_EN', '')),
|
|
'nearest_mall': safe_str(row.get('NEAREST_MALL_EN', '')),
|
|
'nearest_landmark': safe_str(row.get('NEAREST_LANDMARK_EN', '')),
|
|
'total_buyer': int(safe_str(row.get('TOTAL_BUYER', 0)) or 0),
|
|
'total_seller': int(safe_str(row.get('TOTAL_SELLER', 0)) or 0),
|
|
'master_project': safe_str(row.get('MASTER_PROJECT_EN', '')),
|
|
'project': project,
|
|
}
|
|
)
|
|
if created:
|
|
chunk_created += 1
|
|
except Exception as e:
|
|
chunk_errors += 1
|
|
|
|
print(f" ✅ Chunk {chunk_num + 1}: {chunk_created} created, {chunk_errors} errors")
|
|
total_created += chunk_created
|
|
total_errors += chunk_errors
|
|
|
|
print(f" 💼 Transactions Summary: {total_created} created, {total_errors} errors")
|
|
return total_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading transactions: {e}")
|
|
return 0
|
|
|
|
def load_valuations_chunks(csv_path, chunk_size=1000, max_chunks=5):
|
|
"""Load valuations data in chunks."""
|
|
print("💰 Loading valuations data in chunks...")
|
|
try:
|
|
df = pd.read_csv(csv_path)
|
|
print(f" Found {len(df)} valuation records")
|
|
|
|
total_created = 0
|
|
total_errors = 0
|
|
|
|
# Process in chunks
|
|
for chunk_num in range(min(max_chunks, len(df) // chunk_size + 1)):
|
|
start_idx = chunk_num * chunk_size
|
|
end_idx = min((chunk_num + 1) * chunk_size, len(df))
|
|
chunk_df = df.iloc[start_idx:end_idx]
|
|
|
|
print(f" Processing chunk {chunk_num + 1} (rows {start_idx + 1}-{end_idx})...")
|
|
|
|
chunk_created = 0
|
|
chunk_errors = 0
|
|
|
|
for _, row in chunk_df.iterrows():
|
|
try:
|
|
# Create a unique valuation identifier
|
|
valuation_id = f"VAL_{uuid.uuid4().hex[:8]}"
|
|
|
|
valuation, created = Valuation.objects.get_or_create(
|
|
procedure_number=valuation_id,
|
|
defaults={
|
|
'property_total_value': safe_decimal(row.get('PROPERTY_TOTAL_VALUE', 0)),
|
|
'area_en': safe_str(row.get('AREA_EN', '')),
|
|
'actual_area': safe_decimal(row.get('ACTUAL_AREA', 0)),
|
|
'procedure_year': safe_int(row.get('PROCEDURE_YEAR', datetime.now().year)),
|
|
'instance_date': safe_datetime(row.get('INSTANCE_DATE')) or datetime.now(),
|
|
'actual_worth': safe_decimal(row.get('ACTUAL_WORTH', 0)),
|
|
'procedure_area': safe_decimal(row.get('PROCEDURE_AREA', 0)),
|
|
'property_type': safe_str(row.get('PROPERTY_TYPE', 'Unit')),
|
|
'property_sub_type': safe_str(row.get('PROPERTY_SUB_TYPE', '')),
|
|
}
|
|
)
|
|
if created:
|
|
chunk_created += 1
|
|
except Exception as e:
|
|
chunk_errors += 1
|
|
|
|
print(f" ✅ Chunk {chunk_num + 1}: {chunk_created} created, {chunk_errors} errors")
|
|
total_created += chunk_created
|
|
total_errors += chunk_errors
|
|
|
|
print(f" 💰 Valuations Summary: {total_created} created, {total_errors} errors")
|
|
return total_created
|
|
except Exception as e:
|
|
print(f" ❌ Error loading valuations: {e}")
|
|
return 0
|
|
|
|
def create_sample_forecasts():
|
|
"""Create sample forecast data."""
|
|
print("🔮 Creating sample forecasts...")
|
|
try:
|
|
# Get some sample areas and property types from transactions
|
|
areas = Transaction.objects.values_list('area_en', flat=True).distinct()[:5]
|
|
property_types = Transaction.objects.values_list('property_type', flat=True).distinct()[:3]
|
|
|
|
forecasts_created = 0
|
|
for area in areas:
|
|
for prop_type in property_types:
|
|
if not area or not prop_type:
|
|
continue
|
|
|
|
forecast, created = Forecast.objects.get_or_create(
|
|
area_en=area,
|
|
property_type=prop_type,
|
|
defaults={
|
|
'forecast_date': datetime.now().date(),
|
|
'predicted_price': Decimal('1000000.00'),
|
|
'confidence_interval_lower': Decimal('800000.00'),
|
|
'confidence_interval_upper': Decimal('1200000.00'),
|
|
'model_version': '1.0',
|
|
'accuracy_score': Decimal('0.85'),
|
|
'metadata': {'source': 'sample_data', 'model': 'linear_regression'}
|
|
}
|
|
)
|
|
if created:
|
|
forecasts_created += 1
|
|
|
|
print(f" ✅ Created {forecasts_created} sample forecasts")
|
|
return forecasts_created
|
|
except Exception as e:
|
|
print(f" ❌ Error creating forecasts: {e}")
|
|
return 0
|
|
|
|
def verify_data_loaded():
|
|
"""Verify that data has been loaded successfully."""
|
|
print("\n🔍 Verifying loaded data...")
|
|
try:
|
|
counts = {
|
|
'Brokers': Broker.objects.count(),
|
|
'Developers': Developer.objects.count(),
|
|
'Projects': Project.objects.count(),
|
|
'Lands': Land.objects.count(),
|
|
'Rents': Rent.objects.count(),
|
|
'Transactions': Transaction.objects.count(),
|
|
'Valuations': Valuation.objects.count(),
|
|
'Forecasts': Forecast.objects.count(),
|
|
}
|
|
|
|
print(" 📊 Current database counts:")
|
|
for model_name, count in counts.items():
|
|
print(f" {model_name}: {count}")
|
|
|
|
return counts
|
|
except Exception as e:
|
|
print(f" ❌ Error verifying data: {e}")
|
|
return {}
|
|
|
|
def main():
|
|
"""Main function to load all CSV data in chunks."""
|
|
print("=" * 80)
|
|
print(" Dubai Analytics Platform - Load All Data in Chunks")
|
|
print(" Loading data in manageable chunks with progress tracking")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
# Check if sample data directory exists
|
|
sample_data_dir = "sample data"
|
|
if not os.path.exists(sample_data_dir):
|
|
print(f"❌ Sample data directory '{sample_data_dir}' not found!")
|
|
print(" Please ensure the CSV files are in the 'sample data' directory.")
|
|
return
|
|
|
|
# Configuration
|
|
chunk_size = 1000 # Records per chunk
|
|
max_chunks = 10 # Maximum chunks to process per table
|
|
|
|
# Track total records created
|
|
total_created = 0
|
|
|
|
# Load each CSV file in chunks
|
|
csv_files = [
|
|
('brokers.csv', load_brokers_chunks),
|
|
('developers.csv', load_developers_chunks),
|
|
('projects.csv', load_projects_chunks),
|
|
('lands.csv', load_lands_chunks),
|
|
('rents.csv', load_rents_chunks),
|
|
('transactions.csv', load_transactions_chunks),
|
|
('valuations.csv', load_valuations_chunks),
|
|
]
|
|
|
|
for csv_file, load_function in csv_files:
|
|
csv_path = os.path.join(sample_data_dir, csv_file)
|
|
if os.path.exists(csv_path):
|
|
print(f"\n📁 Processing {csv_file}...")
|
|
created = load_function(csv_path, chunk_size=chunk_size, max_chunks=max_chunks)
|
|
total_created += created
|
|
else:
|
|
print(f"⚠️ File {csv_file} not found, skipping...")
|
|
|
|
# Create sample forecasts
|
|
print(f"\n🔮 Creating sample forecasts...")
|
|
forecasts_created = create_sample_forecasts()
|
|
total_created += forecasts_created
|
|
|
|
# Verify data loaded
|
|
counts = verify_data_loaded()
|
|
|
|
# Summary
|
|
print("\n" + "=" * 80)
|
|
print(" Data Loading Summary")
|
|
print("=" * 80)
|
|
print(f"📊 Total records created: {total_created}")
|
|
print()
|
|
print("✅ All data loading completed successfully!")
|
|
print()
|
|
print("Next steps:")
|
|
print("1. Access Django Admin: http://localhost:8000/admin/")
|
|
print("2. Login with: admin@dubai-analytics.com / admin123")
|
|
print("3. View the loaded data in the admin interface")
|
|
print("4. Test the API endpoints with the sample data")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|