dld_backend/buildings_cleaned.json
2025-10-30 12:13:02 +05:30

770 lines
19 KiB
JSON

{
"file_info": {
"file_path": "./buildings_cleaned.csv",
"total_rows": 16322,
"total_columns": 27,
"memory_usage_mb": 11.115302085876465,
"total_cells": 440694
},
"structural_info": {
"column_names": [
"prop_sub_type_en",
"actual_area",
"common_area",
"actual_common_area",
"built_up_area",
"bld_levels",
"shops",
"flats",
"offices",
"swimming_pools",
"elevators",
"creation_date",
"is_offplan_en",
"pre_registration_number",
"is_free_hold_en",
"is_lease_hold_en",
"floors",
"rooms_en",
"car_parks",
"land_number",
"land_sub_number",
"land_type_en",
"master_project_en",
"project_number",
"project_en",
"area_en",
"zone_en"
],
"dtypes": {
"prop_sub_type_en": "object",
"actual_area": "float64",
"common_area": "float64",
"actual_common_area": "float64",
"built_up_area": "float64",
"bld_levels": "float64",
"shops": "float64",
"flats": "float64",
"offices": "float64",
"swimming_pools": "float64",
"elevators": "float64",
"creation_date": "object",
"is_offplan_en": "object",
"pre_registration_number": "float64",
"is_free_hold_en": "object",
"is_lease_hold_en": "object",
"floors": "float64",
"rooms_en": "object",
"car_parks": "float64",
"land_number": "int64",
"land_sub_number": "float64",
"land_type_en": "object",
"master_project_en": "float64",
"project_number": "float64",
"project_en": "object",
"area_en": "object",
"zone_en": "object"
},
"index_info": {
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
"is_unique": true,
"has_duplicates": "False"
}
},
"columns_analysis": {
"prop_sub_type_en": {
"dtype": "object",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 4,
"unique_percentage": 0.02,
"categorical_info": {
"most_frequent": "villa",
"most_frequent_count": 16100,
"least_frequent": "education",
"least_frequent_count": 2,
"top_5_values": {
"villa": 16100,
"building": 213,
"factory": 7,
"education": 2
}
},
"string_length": {
"min": 5,
"max": 9,
"mean": 5.040497488052934,
"median": 5.0
}
},
"actual_area": {
"dtype": "float64",
"non_null_count": 16314,
"null_count": 8,
"null_percentage": 0.05,
"unique_values": 7458,
"unique_percentage": 45.72,
"statistics": {
"mean": 440.360630746598,
"median": 327.815,
"std": 565.7344708785387,
"min": 0.0,
"max": 48484.88,
"q25": 172.43,
"q75": 574.2475,
"skewness": 41.964631194990496,
"kurtosis": 3277.295612239575
},
"outliers": {
"count": 653,
"percentage": 4.0
}
},
"common_area": {
"dtype": "float64",
"non_null_count": 15789,
"null_count": 533,
"null_percentage": 3.27,
"unique_values": 423,
"unique_percentage": 2.68,
"statistics": {
"mean": 0.6874307745899043,
"median": 0.38,
"std": 3.8217089807223634,
"min": 0.0,
"max": 100.0,
"q25": 0.21,
"q75": 0.51,
"skewness": 21.877328100003425,
"kurtosis": 537.2257516510371
},
"outliers": {
"count": 932,
"percentage": 5.71
}
},
"actual_common_area": {
"dtype": "float64",
"non_null_count": 15789,
"null_count": 533,
"null_percentage": 3.27,
"unique_values": 5966,
"unique_percentage": 37.79,
"statistics": {
"mean": 363.5476762049029,
"median": 234.84,
"std": 412.9982837857376,
"min": 0.0,
"max": 4158.64,
"q25": 109.51,
"q75": 479.44,
"skewness": 2.306035925075377,
"kurtosis": 8.409536733174551
},
"outliers": {
"count": 1199,
"percentage": 7.35
}
},
"built_up_area": {
"dtype": "float64",
"non_null_count": 16321,
"null_count": 1,
"null_percentage": 0.01,
"unique_values": 837,
"unique_percentage": 5.13,
"statistics": {
"mean": 442.80775136327435,
"median": 325.2,
"std": 1995.4832633150934,
"min": 0.0,
"max": 226678.0,
"q25": 222.71,
"q75": 461.0,
"skewness": 94.3067404198699,
"kurtosis": 10266.772410938267
},
"outliers": {
"count": 1187,
"percentage": 7.27
}
},
"bld_levels": {
"dtype": "float64",
"non_null_count": 17,
"null_count": 16305,
"null_percentage": 99.9,
"unique_values": 9,
"unique_percentage": 52.94,
"statistics": {
"mean": 10.823529411764707,
"median": 6.0,
"std": 16.17959862804717,
"min": 1.0,
"max": 65.0,
"q25": 1.0,
"q75": 13.0,
"skewness": 2.6726359781758635,
"kurtosis": 8.054097688638388
},
"outliers": {
"count": 1,
"percentage": 0.01
}
},
"shops": {
"dtype": "float64",
"non_null_count": 8,
"null_count": 16314,
"null_percentage": 99.95,
"unique_values": 4,
"unique_percentage": 50.0,
"statistics": {
"mean": 4.875,
"median": 5.0,
"std": 1.8850918886280925,
"min": 3.0,
"max": 8.0,
"q25": 3.0,
"q75": 5.5,
"skewness": 0.578461064887467,
"kurtosis": -0.7503244867553853
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"flats": {
"dtype": "float64",
"non_null_count": 0,
"null_count": 16322,
"null_percentage": 100.0,
"unique_values": 0,
"unique_percentage": 0,
"statistics": {
"mean": null,
"median": null,
"std": null,
"min": null,
"max": null,
"q25": null,
"q75": null,
"skewness": null,
"kurtosis": null
}
},
"offices": {
"dtype": "float64",
"non_null_count": 8,
"null_count": 16314,
"null_percentage": 99.95,
"unique_values": 2,
"unique_percentage": 25.0,
"statistics": {
"mean": 3.25,
"median": 0.0,
"std": 9.192388155425117,
"min": 0.0,
"max": 26.0,
"q25": 0.0,
"q75": 0.0,
"skewness": 2.8284271247461903,
"kurtosis": 8.0
},
"outliers": {
"count": 1,
"percentage": 0.01
}
},
"swimming_pools": {
"dtype": "float64",
"non_null_count": 8,
"null_count": 16314,
"null_percentage": 99.95,
"unique_values": 2,
"unique_percentage": 25.0,
"statistics": {
"mean": 0.5,
"median": 0.5,
"std": 0.5345224838248488,
"min": 0.0,
"max": 1.0,
"q25": 0.0,
"q75": 1.0,
"skewness": 0.0,
"kurtosis": -2.8000000000000003
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"elevators": {
"dtype": "float64",
"non_null_count": 8,
"null_count": 16314,
"null_percentage": 99.95,
"unique_values": 5,
"unique_percentage": 62.5,
"statistics": {
"mean": 3.875,
"median": 3.0,
"std": 2.799872446074234,
"min": 1.0,
"max": 9.0,
"q25": 2.5,
"q75": 4.75,
"skewness": 1.003137139473818,
"kurtosis": 0.25309125627202
},
"outliers": {
"count": 1,
"percentage": 0.01
}
},
"creation_date": {
"dtype": "object",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 1017,
"unique_percentage": 6.23,
"categorical_info": {
"most_frequent": "2025-10-09 08:37:52",
"most_frequent_count": 101,
"least_frequent": "2025-01-29 10:30:55",
"least_frequent_count": 1,
"top_5_values": {
"2025-10-09 08:37:52": 101,
"2025-04-23 12:11:46": 55,
"2025-02-23 20:42:57": 54,
"2025-03-14 15:08:55": 51,
"2025-02-23 20:42:56": 50
}
},
"string_length": {
"min": 19,
"max": 19,
"mean": 19.0,
"median": 19.0
}
},
"is_offplan_en": {
"dtype": "object",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2,
"unique_percentage": 0.01,
"categorical_info": {
"most_frequent": "ready",
"most_frequent_count": 13400,
"least_frequent": "off-plan",
"least_frequent_count": 2922,
"top_5_values": {
"ready": 13400,
"off-plan": 2922
}
},
"string_length": {
"min": 5,
"max": 8,
"mean": 5.53706653596373,
"median": 5.0
}
},
"pre_registration_number": {
"dtype": "float64",
"non_null_count": 0,
"null_count": 16322,
"null_percentage": 100.0,
"unique_values": 0,
"unique_percentage": 0,
"statistics": {
"mean": null,
"median": null,
"std": null,
"min": null,
"max": null,
"q25": null,
"q75": null,
"skewness": null,
"kurtosis": null
}
},
"is_free_hold_en": {
"dtype": "object",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 1,
"unique_percentage": 0.01,
"categorical_info": {
"most_frequent": "free hold",
"most_frequent_count": 16322,
"least_frequent": "free hold",
"least_frequent_count": 16322,
"top_5_values": {
"free hold": 16322
}
},
"string_length": {
"min": 9,
"max": 9,
"mean": 9.0,
"median": 9.0
}
},
"is_lease_hold_en": {
"dtype": "object",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2,
"unique_percentage": 0.01,
"categorical_info": {
"most_frequent": "no",
"most_frequent_count": 16282,
"least_frequent": "yes",
"least_frequent_count": 40,
"top_5_values": {
"no": 16282,
"yes": 40
}
},
"string_length": {
"min": 2,
"max": 3,
"mean": 2.002450680063718,
"median": 2.0
}
},
"floors": {
"dtype": "float64",
"non_null_count": 16117,
"null_count": 205,
"null_percentage": 1.26,
"unique_values": 12,
"unique_percentage": 0.07,
"statistics": {
"mean": 2.549109635788298,
"median": 2.0,
"std": 0.8014777445666159,
"min": 0.0,
"max": 63.0,
"q25": 2.0,
"q75": 3.0,
"skewness": 29.521296273600083,
"kurtosis": 2079.797677523565
},
"outliers": {
"count": 13,
"percentage": 0.08
}
},
"rooms_en": {
"dtype": "object",
"non_null_count": 16100,
"null_count": 222,
"null_percentage": 1.36,
"unique_values": 8,
"unique_percentage": 0.05,
"categorical_info": {
"most_frequent": "4 b/r",
"most_frequent_count": 8233,
"least_frequent": "9 b/r",
"least_frequent_count": 4,
"top_5_values": {
"4 b/r": 8233,
"5 b/r": 4593,
"3 b/r": 2064,
"6 b/r": 1005,
"7 b/r": 139
}
},
"string_length": {
"min": 5,
"max": 5,
"mean": 5.0,
"median": 5.0
}
},
"car_parks": {
"dtype": "float64",
"non_null_count": 16110,
"null_count": 212,
"null_percentage": 1.3,
"unique_values": 19,
"unique_percentage": 0.12,
"statistics": {
"mean": 2.4911235257603974,
"median": 2.0,
"std": 12.203512118671139,
"min": 0.0,
"max": 1000.0,
"q25": 2.0,
"q75": 2.0,
"skewness": 71.00007591541561,
"kurtosis": 5433.072219709761
},
"outliers": {
"count": 4266,
"percentage": 26.14
}
},
"land_number": {
"dtype": "int64",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 10209,
"unique_percentage": 62.55,
"statistics": {
"mean": 7312.259833353755,
"median": 6677.5,
"std": 4874.308474708385,
"min": 6.0,
"max": 32233.0,
"q25": 3949.25,
"q75": 9360.75,
"skewness": 1.7372716408578281,
"kurtosis": 5.35564277646592
},
"outliers": {
"count": 632,
"percentage": 3.87
}
},
"land_sub_number": {
"dtype": "float64",
"non_null_count": 3964,
"null_count": 12358,
"null_percentage": 75.71,
"unique_values": 1,
"unique_percentage": 0.03,
"statistics": {
"mean": 0.0,
"median": 0.0,
"std": 0.0,
"min": 0.0,
"max": 0.0,
"q25": 0.0,
"q75": 0.0,
"skewness": 0.0,
"kurtosis": 0.0
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"land_type_en": {
"dtype": "object",
"non_null_count": 10211,
"null_count": 6111,
"null_percentage": 37.44,
"unique_values": 5,
"unique_percentage": 0.05,
"categorical_info": {
"most_frequent": "commercial",
"most_frequent_count": 6352,
"least_frequent": "public facilities",
"least_frequent_count": 8,
"top_5_values": {
"commercial": 6352,
"industrial": 2037,
"residential": 1001,
"government authorities": 813,
"public facilities": 8
}
},
"string_length": {
"min": 10,
"max": 22,
"mean": 11.058956027813142,
"median": 10.0
}
},
"master_project_en": {
"dtype": "float64",
"non_null_count": 0,
"null_count": 16322,
"null_percentage": 100.0,
"unique_values": 0,
"unique_percentage": 0,
"statistics": {
"mean": null,
"median": null,
"std": null,
"min": null,
"max": null,
"q25": null,
"q75": null,
"skewness": null,
"kurtosis": null
}
},
"project_number": {
"dtype": "float64",
"non_null_count": 15732,
"null_count": 590,
"null_percentage": 3.61,
"unique_values": 119,
"unique_percentage": 0.76,
"statistics": {
"mean": 3297.36683193491,
"median": 3514.0,
"std": 631.9558419147014,
"min": -3625.0,
"max": 4128.0,
"q25": 3145.0,
"q75": 3700.0,
"skewness": -1.2459462283443374,
"kurtosis": 1.4853274860563177
},
"outliers": {
"count": 2063,
"percentage": 12.64
}
},
"project_en": {
"dtype": "object",
"non_null_count": 15732,
"null_count": 590,
"null_percentage": 3.61,
"unique_values": 119,
"unique_percentage": 0.76,
"categorical_info": {
"most_frequent": "the oasis - palace villas - ostra",
"most_frequent_count": 526,
"least_frequent": "dubai hills - hills grove community",
"least_frequent_count": 1,
"top_5_values": {
"the oasis - palace villas - ostra": 526,
"harmony": 514,
"the valley - vindera": 499,
"the oasis - address villas - tierra": 487,
"damac islands - seychelles 2": 474
}
},
"string_length": {
"min": 5,
"max": 58,
"mean": 22.230358504958048,
"median": 22.0
}
},
"area_en": {
"dtype": "object",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 61,
"unique_percentage": 0.37,
"categorical_info": {
"most_frequent": "al yelayiss 1",
"most_frequent_count": 3931,
"least_frequent": "al barsha second",
"least_frequent_count": 1,
"top_5_values": {
"al yelayiss 1": 3931,
"dubai investment park second": 1977,
"wadi al safa 3": 1089,
"madinat al mataar": 1056,
"al hebiah fourth": 1023
}
},
"string_length": {
"min": 7,
"max": 33,
"mean": 16.426785933096433,
"median": 14.0
}
},
"zone_en": {
"dtype": "object",
"non_null_count": 16322,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2,
"unique_percentage": 0.01,
"categorical_info": {
"most_frequent": "dubai",
"most_frequent_count": 16295,
"least_frequent": "deira",
"least_frequent_count": 27,
"top_5_values": {
"dubai": 16295,
"deira": 27
}
},
"string_length": {
"min": 5,
"max": 5,
"mean": 5.0,
"median": 5.0
}
}
},
"data_quality": {
"total_missing_values": 151890,
"missing_percentage": 34.47,
"columns_with_missing": {
"actual_area": 8,
"common_area": 533,
"actual_common_area": 533,
"built_up_area": 1,
"bld_levels": 16305,
"shops": 16314,
"flats": 16322,
"offices": 16314,
"swimming_pools": 16314,
"elevators": 16314,
"pre_registration_number": 16322,
"floors": 205,
"rooms_en": 222,
"car_parks": 212,
"land_sub_number": 12358,
"land_type_en": 6111,
"master_project_en": 16322,
"project_number": 590,
"project_en": 590
},
"missing_percentage_by_column": {
"actual_area": 0.05,
"common_area": 3.27,
"actual_common_area": 3.27,
"built_up_area": 0.01,
"bld_levels": 99.9,
"shops": 99.95,
"flats": 100.0,
"offices": 99.95,
"swimming_pools": 99.95,
"elevators": 99.95,
"pre_registration_number": 100.0,
"floors": 1.26,
"rooms_en": 1.36,
"car_parks": 1.3,
"land_sub_number": 75.71,
"land_type_en": 37.44,
"master_project_en": 100.0,
"project_number": 3.61,
"project_en": 3.61
},
"duplicate_rows": 0,
"duplicate_percentage": 0.0
},
"statistical_summary": {
"numeric_columns_count": 17,
"categorical_columns_count": 10,
"datetime_columns_count": 0,
"data_types_distribution": {
"float64": 16,
"object": 10,
"int64": 1
},
"potential_identifier_columns": []
}
}