{ "file_info": { "file_path": "./buildings_cleaned.csv", "total_rows": 16322, "total_columns": 27, "memory_usage_mb": 11.115302085876465, "total_cells": 440694 }, "structural_info": { "column_names": [ "prop_sub_type_en", "actual_area", "common_area", "actual_common_area", "built_up_area", "bld_levels", "shops", "flats", "offices", "swimming_pools", "elevators", "creation_date", "is_offplan_en", "pre_registration_number", "is_free_hold_en", "is_lease_hold_en", "floors", "rooms_en", "car_parks", "land_number", "land_sub_number", "land_type_en", "master_project_en", "project_number", "project_en", "area_en", "zone_en" ], "dtypes": { "prop_sub_type_en": "object", "actual_area": "float64", "common_area": "float64", "actual_common_area": "float64", "built_up_area": "float64", "bld_levels": "float64", "shops": "float64", "flats": "float64", "offices": "float64", "swimming_pools": "float64", "elevators": "float64", "creation_date": "object", "is_offplan_en": "object", "pre_registration_number": "float64", "is_free_hold_en": "object", "is_lease_hold_en": "object", "floors": "float64", "rooms_en": "object", "car_parks": "float64", "land_number": "int64", "land_sub_number": "float64", "land_type_en": "object", "master_project_en": "float64", "project_number": "float64", "project_en": "object", "area_en": "object", "zone_en": "object" }, "index_info": { "type": "", "is_unique": true, "has_duplicates": "False" } }, "columns_analysis": { "prop_sub_type_en": { "dtype": "object", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 4, "unique_percentage": 0.02, "categorical_info": { "most_frequent": "villa", "most_frequent_count": 16100, "least_frequent": "education", "least_frequent_count": 2, "top_5_values": { "villa": 16100, "building": 213, "factory": 7, "education": 2 } }, "string_length": { "min": 5, "max": 9, "mean": 5.040497488052934, "median": 5.0 } }, "actual_area": { "dtype": "float64", "non_null_count": 16314, "null_count": 8, "null_percentage": 0.05, "unique_values": 7458, "unique_percentage": 45.72, "statistics": { "mean": 440.360630746598, "median": 327.815, "std": 565.7344708785387, "min": 0.0, "max": 48484.88, "q25": 172.43, "q75": 574.2475, "skewness": 41.964631194990496, "kurtosis": 3277.295612239575 }, "outliers": { "count": 653, "percentage": 4.0 } }, "common_area": { "dtype": "float64", "non_null_count": 15789, "null_count": 533, "null_percentage": 3.27, "unique_values": 423, "unique_percentage": 2.68, "statistics": { "mean": 0.6874307745899043, "median": 0.38, "std": 3.8217089807223634, "min": 0.0, "max": 100.0, "q25": 0.21, "q75": 0.51, "skewness": 21.877328100003425, "kurtosis": 537.2257516510371 }, "outliers": { "count": 932, "percentage": 5.71 } }, "actual_common_area": { "dtype": "float64", "non_null_count": 15789, "null_count": 533, "null_percentage": 3.27, "unique_values": 5966, "unique_percentage": 37.79, "statistics": { "mean": 363.5476762049029, "median": 234.84, "std": 412.9982837857376, "min": 0.0, "max": 4158.64, "q25": 109.51, "q75": 479.44, "skewness": 2.306035925075377, "kurtosis": 8.409536733174551 }, "outliers": { "count": 1199, "percentage": 7.35 } }, "built_up_area": { "dtype": "float64", "non_null_count": 16321, "null_count": 1, "null_percentage": 0.01, "unique_values": 837, "unique_percentage": 5.13, "statistics": { "mean": 442.80775136327435, "median": 325.2, "std": 1995.4832633150934, "min": 0.0, "max": 226678.0, "q25": 222.71, "q75": 461.0, "skewness": 94.3067404198699, "kurtosis": 10266.772410938267 }, "outliers": { "count": 1187, "percentage": 7.27 } }, "bld_levels": { "dtype": "float64", "non_null_count": 17, "null_count": 16305, "null_percentage": 99.9, "unique_values": 9, "unique_percentage": 52.94, "statistics": { "mean": 10.823529411764707, "median": 6.0, "std": 16.17959862804717, "min": 1.0, "max": 65.0, "q25": 1.0, "q75": 13.0, "skewness": 2.6726359781758635, "kurtosis": 8.054097688638388 }, "outliers": { "count": 1, "percentage": 0.01 } }, "shops": { "dtype": "float64", "non_null_count": 8, "null_count": 16314, "null_percentage": 99.95, "unique_values": 4, "unique_percentage": 50.0, "statistics": { "mean": 4.875, "median": 5.0, "std": 1.8850918886280925, "min": 3.0, "max": 8.0, "q25": 3.0, "q75": 5.5, "skewness": 0.578461064887467, "kurtosis": -0.7503244867553853 }, "outliers": { "count": 0, "percentage": 0.0 } }, "flats": { "dtype": "float64", "non_null_count": 0, "null_count": 16322, "null_percentage": 100.0, "unique_values": 0, "unique_percentage": 0, "statistics": { "mean": null, "median": null, "std": null, "min": null, "max": null, "q25": null, "q75": null, "skewness": null, "kurtosis": null } }, "offices": { "dtype": "float64", "non_null_count": 8, "null_count": 16314, "null_percentage": 99.95, "unique_values": 2, "unique_percentage": 25.0, "statistics": { "mean": 3.25, "median": 0.0, "std": 9.192388155425117, "min": 0.0, "max": 26.0, "q25": 0.0, "q75": 0.0, "skewness": 2.8284271247461903, "kurtosis": 8.0 }, "outliers": { "count": 1, "percentage": 0.01 } }, "swimming_pools": { "dtype": "float64", "non_null_count": 8, "null_count": 16314, "null_percentage": 99.95, "unique_values": 2, "unique_percentage": 25.0, "statistics": { "mean": 0.5, "median": 0.5, "std": 0.5345224838248488, "min": 0.0, "max": 1.0, "q25": 0.0, "q75": 1.0, "skewness": 0.0, "kurtosis": -2.8000000000000003 }, "outliers": { "count": 0, "percentage": 0.0 } }, "elevators": { "dtype": "float64", "non_null_count": 8, "null_count": 16314, "null_percentage": 99.95, "unique_values": 5, "unique_percentage": 62.5, "statistics": { "mean": 3.875, "median": 3.0, "std": 2.799872446074234, "min": 1.0, "max": 9.0, "q25": 2.5, "q75": 4.75, "skewness": 1.003137139473818, "kurtosis": 0.25309125627202 }, "outliers": { "count": 1, "percentage": 0.01 } }, "creation_date": { "dtype": "object", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 1017, "unique_percentage": 6.23, "categorical_info": { "most_frequent": "2025-10-09 08:37:52", "most_frequent_count": 101, "least_frequent": "2025-01-29 10:30:55", "least_frequent_count": 1, "top_5_values": { "2025-10-09 08:37:52": 101, "2025-04-23 12:11:46": 55, "2025-02-23 20:42:57": 54, "2025-03-14 15:08:55": 51, "2025-02-23 20:42:56": 50 } }, "string_length": { "min": 19, "max": 19, "mean": 19.0, "median": 19.0 } }, "is_offplan_en": { "dtype": "object", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 2, "unique_percentage": 0.01, "categorical_info": { "most_frequent": "ready", "most_frequent_count": 13400, "least_frequent": "off-plan", "least_frequent_count": 2922, "top_5_values": { "ready": 13400, "off-plan": 2922 } }, "string_length": { "min": 5, "max": 8, "mean": 5.53706653596373, "median": 5.0 } }, "pre_registration_number": { "dtype": "float64", "non_null_count": 0, "null_count": 16322, "null_percentage": 100.0, "unique_values": 0, "unique_percentage": 0, "statistics": { "mean": null, "median": null, "std": null, "min": null, "max": null, "q25": null, "q75": null, "skewness": null, "kurtosis": null } }, "is_free_hold_en": { "dtype": "object", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 1, "unique_percentage": 0.01, "categorical_info": { "most_frequent": "free hold", "most_frequent_count": 16322, "least_frequent": "free hold", "least_frequent_count": 16322, "top_5_values": { "free hold": 16322 } }, "string_length": { "min": 9, "max": 9, "mean": 9.0, "median": 9.0 } }, "is_lease_hold_en": { "dtype": "object", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 2, "unique_percentage": 0.01, "categorical_info": { "most_frequent": "no", "most_frequent_count": 16282, "least_frequent": "yes", "least_frequent_count": 40, "top_5_values": { "no": 16282, "yes": 40 } }, "string_length": { "min": 2, "max": 3, "mean": 2.002450680063718, "median": 2.0 } }, "floors": { "dtype": "float64", "non_null_count": 16117, "null_count": 205, "null_percentage": 1.26, "unique_values": 12, "unique_percentage": 0.07, "statistics": { "mean": 2.549109635788298, "median": 2.0, "std": 0.8014777445666159, "min": 0.0, "max": 63.0, "q25": 2.0, "q75": 3.0, "skewness": 29.521296273600083, "kurtosis": 2079.797677523565 }, "outliers": { "count": 13, "percentage": 0.08 } }, "rooms_en": { "dtype": "object", "non_null_count": 16100, "null_count": 222, "null_percentage": 1.36, "unique_values": 8, "unique_percentage": 0.05, "categorical_info": { "most_frequent": "4 b/r", "most_frequent_count": 8233, "least_frequent": "9 b/r", "least_frequent_count": 4, "top_5_values": { "4 b/r": 8233, "5 b/r": 4593, "3 b/r": 2064, "6 b/r": 1005, "7 b/r": 139 } }, "string_length": { "min": 5, "max": 5, "mean": 5.0, "median": 5.0 } }, "car_parks": { "dtype": "float64", "non_null_count": 16110, "null_count": 212, "null_percentage": 1.3, "unique_values": 19, "unique_percentage": 0.12, "statistics": { "mean": 2.4911235257603974, "median": 2.0, "std": 12.203512118671139, "min": 0.0, "max": 1000.0, "q25": 2.0, "q75": 2.0, "skewness": 71.00007591541561, "kurtosis": 5433.072219709761 }, "outliers": { "count": 4266, "percentage": 26.14 } }, "land_number": { "dtype": "int64", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 10209, "unique_percentage": 62.55, "statistics": { "mean": 7312.259833353755, "median": 6677.5, "std": 4874.308474708385, "min": 6.0, "max": 32233.0, "q25": 3949.25, "q75": 9360.75, "skewness": 1.7372716408578281, "kurtosis": 5.35564277646592 }, "outliers": { "count": 632, "percentage": 3.87 } }, "land_sub_number": { "dtype": "float64", "non_null_count": 3964, "null_count": 12358, "null_percentage": 75.71, "unique_values": 1, "unique_percentage": 0.03, "statistics": { "mean": 0.0, "median": 0.0, "std": 0.0, "min": 0.0, "max": 0.0, "q25": 0.0, "q75": 0.0, "skewness": 0.0, "kurtosis": 0.0 }, "outliers": { "count": 0, "percentage": 0.0 } }, "land_type_en": { "dtype": "object", "non_null_count": 10211, "null_count": 6111, "null_percentage": 37.44, "unique_values": 5, "unique_percentage": 0.05, "categorical_info": { "most_frequent": "commercial", "most_frequent_count": 6352, "least_frequent": "public facilities", "least_frequent_count": 8, "top_5_values": { "commercial": 6352, "industrial": 2037, "residential": 1001, "government authorities": 813, "public facilities": 8 } }, "string_length": { "min": 10, "max": 22, "mean": 11.058956027813142, "median": 10.0 } }, "master_project_en": { "dtype": "float64", "non_null_count": 0, "null_count": 16322, "null_percentage": 100.0, "unique_values": 0, "unique_percentage": 0, "statistics": { "mean": null, "median": null, "std": null, "min": null, "max": null, "q25": null, "q75": null, "skewness": null, "kurtosis": null } }, "project_number": { "dtype": "float64", "non_null_count": 15732, "null_count": 590, "null_percentage": 3.61, "unique_values": 119, "unique_percentage": 0.76, "statistics": { "mean": 3297.36683193491, "median": 3514.0, "std": 631.9558419147014, "min": -3625.0, "max": 4128.0, "q25": 3145.0, "q75": 3700.0, "skewness": -1.2459462283443374, "kurtosis": 1.4853274860563177 }, "outliers": { "count": 2063, "percentage": 12.64 } }, "project_en": { "dtype": "object", "non_null_count": 15732, "null_count": 590, "null_percentage": 3.61, "unique_values": 119, "unique_percentage": 0.76, "categorical_info": { "most_frequent": "the oasis - palace villas - ostra", "most_frequent_count": 526, "least_frequent": "dubai hills - hills grove community", "least_frequent_count": 1, "top_5_values": { "the oasis - palace villas - ostra": 526, "harmony": 514, "the valley - vindera": 499, "the oasis - address villas - tierra": 487, "damac islands - seychelles 2": 474 } }, "string_length": { "min": 5, "max": 58, "mean": 22.230358504958048, "median": 22.0 } }, "area_en": { "dtype": "object", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 61, "unique_percentage": 0.37, "categorical_info": { "most_frequent": "al yelayiss 1", "most_frequent_count": 3931, "least_frequent": "al barsha second", "least_frequent_count": 1, "top_5_values": { "al yelayiss 1": 3931, "dubai investment park second": 1977, "wadi al safa 3": 1089, "madinat al mataar": 1056, "al hebiah fourth": 1023 } }, "string_length": { "min": 7, "max": 33, "mean": 16.426785933096433, "median": 14.0 } }, "zone_en": { "dtype": "object", "non_null_count": 16322, "null_count": 0, "null_percentage": 0.0, "unique_values": 2, "unique_percentage": 0.01, "categorical_info": { "most_frequent": "dubai", "most_frequent_count": 16295, "least_frequent": "deira", "least_frequent_count": 27, "top_5_values": { "dubai": 16295, "deira": 27 } }, "string_length": { "min": 5, "max": 5, "mean": 5.0, "median": 5.0 } } }, "data_quality": { "total_missing_values": 151890, "missing_percentage": 34.47, "columns_with_missing": { "actual_area": 8, "common_area": 533, "actual_common_area": 533, "built_up_area": 1, "bld_levels": 16305, "shops": 16314, "flats": 16322, "offices": 16314, "swimming_pools": 16314, "elevators": 16314, "pre_registration_number": 16322, "floors": 205, "rooms_en": 222, "car_parks": 212, "land_sub_number": 12358, "land_type_en": 6111, "master_project_en": 16322, "project_number": 590, "project_en": 590 }, "missing_percentage_by_column": { "actual_area": 0.05, "common_area": 3.27, "actual_common_area": 3.27, "built_up_area": 0.01, "bld_levels": 99.9, "shops": 99.95, "flats": 100.0, "offices": 99.95, "swimming_pools": 99.95, "elevators": 99.95, "pre_registration_number": 100.0, "floors": 1.26, "rooms_en": 1.36, "car_parks": 1.3, "land_sub_number": 75.71, "land_type_en": 37.44, "master_project_en": 100.0, "project_number": 3.61, "project_en": 3.61 }, "duplicate_rows": 0, "duplicate_percentage": 0.0 }, "statistical_summary": { "numeric_columns_count": 17, "categorical_columns_count": 10, "datetime_columns_count": 0, "data_types_distribution": { "float64": 16, "object": 10, "int64": 1 }, "potential_identifier_columns": [] } }