384 lines
10 KiB
JSON
384 lines
10 KiB
JSON
{
|
|
"file_info": {
|
|
"file_path": "./lands_cleaned.csv",
|
|
"total_rows": 186796,
|
|
"total_columns": 12,
|
|
"memory_usage_mb": 88.59803771972656,
|
|
"total_cells": 2241552
|
|
},
|
|
"structural_info": {
|
|
"column_names": [
|
|
"land_type_en",
|
|
"prop_sub_type_en",
|
|
"actual_area",
|
|
"is_offplan_en",
|
|
"pre_registration_number",
|
|
"is_free_hold_en",
|
|
"dm_zip_code",
|
|
"master_project_en",
|
|
"project_number",
|
|
"project_en",
|
|
"area_en",
|
|
"zone_en"
|
|
],
|
|
"dtypes": {
|
|
"land_type_en": "object",
|
|
"prop_sub_type_en": "object",
|
|
"actual_area": "float64",
|
|
"is_offplan_en": "object",
|
|
"pre_registration_number": "object",
|
|
"is_free_hold_en": "object",
|
|
"dm_zip_code": "int64",
|
|
"master_project_en": "object",
|
|
"project_number": "float64",
|
|
"project_en": "object",
|
|
"area_en": "object",
|
|
"zone_en": "object"
|
|
},
|
|
"index_info": {
|
|
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
|
|
"is_unique": true,
|
|
"has_duplicates": "False"
|
|
}
|
|
},
|
|
"columns_analysis": {
|
|
"land_type_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 167926,
|
|
"null_count": 18870,
|
|
"null_percentage": 10.1,
|
|
"unique_values": 18,
|
|
"unique_percentage": 0.01,
|
|
"categorical_info": {
|
|
"most_frequent": "commercial",
|
|
"most_frequent_count": 116708,
|
|
"least_frequent": "healthcare",
|
|
"least_frequent_count": 5,
|
|
"top_5_values": {
|
|
"commercial": 116708,
|
|
"residential": 24891,
|
|
"industrial": 9006,
|
|
"government authorities": 5417,
|
|
"public facilities": 3389
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 7,
|
|
"max": 22,
|
|
"mean": 10.689023736645904,
|
|
"median": 10.0
|
|
}
|
|
},
|
|
"prop_sub_type_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 151257,
|
|
"null_count": 35539,
|
|
"null_percentage": 19.03,
|
|
"unique_values": 60,
|
|
"unique_percentage": 0.04,
|
|
"categorical_info": {
|
|
"most_frequent": "residential",
|
|
"most_frequent_count": 79630,
|
|
"least_frequent": "grave yard",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"residential": 79630,
|
|
"commercial": 38947,
|
|
"industrial": 5497,
|
|
"government housing": 4591,
|
|
"land": 4117
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 36,
|
|
"mean": 10.733559438571438,
|
|
"median": 11.0
|
|
}
|
|
},
|
|
"actual_area": {
|
|
"dtype": "float64",
|
|
"non_null_count": 185195,
|
|
"null_count": 1601,
|
|
"null_percentage": 0.86,
|
|
"unique_values": 90900,
|
|
"unique_percentage": 49.08,
|
|
"statistics": {
|
|
"mean": 37851.55568400876,
|
|
"median": 702.0,
|
|
"std": 6360119.290112485,
|
|
"min": 0.0,
|
|
"max": 2631387198.25,
|
|
"q25": 254.45,
|
|
"q75": 1311.915,
|
|
"skewness": 387.0212453904429,
|
|
"kurtosis": 158550.08551622913
|
|
},
|
|
"outliers": {
|
|
"count": 22129,
|
|
"percentage": 11.85
|
|
}
|
|
},
|
|
"is_offplan_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 186796,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "ready",
|
|
"most_frequent_count": 177444,
|
|
"least_frequent": "off-plan",
|
|
"least_frequent_count": 9352,
|
|
"top_5_values": {
|
|
"ready": 177444,
|
|
"off-plan": 9352
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 5,
|
|
"max": 8,
|
|
"mean": 5.150195935673141,
|
|
"median": 5.0
|
|
}
|
|
},
|
|
"pre_registration_number": {
|
|
"dtype": "object",
|
|
"non_null_count": 119446,
|
|
"null_count": 67350,
|
|
"null_percentage": 36.06,
|
|
"unique_values": 116141,
|
|
"unique_percentage": 97.23,
|
|
"categorical_info": {
|
|
"most_frequent": "b.016",
|
|
"most_frequent_count": 12,
|
|
"least_frequent": "900-1183-b",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"b.016": 12,
|
|
"od": 10,
|
|
"dubai maritime city main land": 9,
|
|
"pa_plot2": 7,
|
|
"jge-rc-a-q001": 5
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 1,
|
|
"max": 46,
|
|
"mean": 9.969626442074242,
|
|
"median": 10.0
|
|
}
|
|
},
|
|
"is_free_hold_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 186796,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "free hold",
|
|
"most_frequent_count": 121347,
|
|
"least_frequent": "non free hold",
|
|
"least_frequent_count": 65449,
|
|
"top_5_values": {
|
|
"free hold": 121347,
|
|
"non free hold": 65449
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 9,
|
|
"max": 13,
|
|
"mean": 10.401507526927771,
|
|
"median": 9.0
|
|
}
|
|
},
|
|
"dm_zip_code": {
|
|
"dtype": "int64",
|
|
"non_null_count": 186796,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 225,
|
|
"unique_percentage": 0.12,
|
|
"statistics": {
|
|
"mean": 566.9359086918349,
|
|
"median": 618.0,
|
|
"std": 222.0272899204335,
|
|
"min": 0.0,
|
|
"max": 991.0,
|
|
"q25": 381.0,
|
|
"q75": 683.0,
|
|
"skewness": -0.18975758304103113,
|
|
"kurtosis": -0.6482563478613983
|
|
},
|
|
"outliers": {
|
|
"count": 0,
|
|
"percentage": 0.0
|
|
}
|
|
},
|
|
"master_project_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 12,
|
|
"null_count": 186784,
|
|
"null_percentage": 99.99,
|
|
"unique_values": 10,
|
|
"unique_percentage": 83.33,
|
|
"categorical_info": {
|
|
"most_frequent": "hills park",
|
|
"most_frequent_count": 2,
|
|
"least_frequent": "mohammed bin rashid al maktoum city -district one west - phase 2",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"hills park": 2,
|
|
"elysee heights": 2,
|
|
"jannat": 1,
|
|
"remraam - al ramth": 1,
|
|
"maison elysee iii by pantheon": 1
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 6,
|
|
"max": 64,
|
|
"mean": 20.833333333333332,
|
|
"median": 15.0
|
|
}
|
|
},
|
|
"project_number": {
|
|
"dtype": "float64",
|
|
"non_null_count": 80859,
|
|
"null_count": 105937,
|
|
"null_percentage": 56.71,
|
|
"unique_values": 3595,
|
|
"unique_percentage": 4.45,
|
|
"statistics": {
|
|
"mean": 2369.7444935010326,
|
|
"median": 2336.0,
|
|
"std": 851.6698321798689,
|
|
"min": -2180.0,
|
|
"max": 4174.0,
|
|
"q25": 1648.0,
|
|
"q75": 3126.0,
|
|
"skewness": -0.02167819427179996,
|
|
"kurtosis": -0.4971078659064898
|
|
},
|
|
"outliers": {
|
|
"count": 34,
|
|
"percentage": 0.02
|
|
}
|
|
},
|
|
"project_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 80859,
|
|
"null_count": 105937,
|
|
"null_percentage": 56.71,
|
|
"unique_values": 3573,
|
|
"unique_percentage": 4.42,
|
|
"categorical_info": {
|
|
"most_frequent": "damac lagoons - portofino",
|
|
"most_frequent_count": 874,
|
|
"least_frequent": "azizi riviera 7",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"damac lagoons - portofino": 874,
|
|
"damac lagoons - ibiza": 824,
|
|
"damac hills (2) - violet 4": 803,
|
|
"damac sun city": 770,
|
|
"damac lagoons - malta (1)": 760
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 2,
|
|
"max": 77,
|
|
"mean": 22.367899677215895,
|
|
"median": 24.0
|
|
}
|
|
},
|
|
"area_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 186796,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 258,
|
|
"unique_percentage": 0.14,
|
|
"categorical_info": {
|
|
"most_frequent": "madinat hind 4",
|
|
"most_frequent_count": 14330,
|
|
"least_frequent": "al wajehah al bhariyah",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"madinat hind 4": 14330,
|
|
"al hebiah fifth": 10282,
|
|
"jabal ali first": 7206,
|
|
"al yelayiss 1": 6844,
|
|
"wadi al safa 5": 6102
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 33,
|
|
"mean": 15.13695689415191,
|
|
"median": 14.0
|
|
}
|
|
},
|
|
"zone_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 186796,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "dubai",
|
|
"most_frequent_count": 154922,
|
|
"least_frequent": "deira",
|
|
"least_frequent_count": 31874,
|
|
"top_5_values": {
|
|
"dubai": 154922,
|
|
"deira": 31874
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 5,
|
|
"max": 5,
|
|
"mean": 5.0,
|
|
"median": 5.0
|
|
}
|
|
}
|
|
},
|
|
"data_quality": {
|
|
"total_missing_values": 522018,
|
|
"missing_percentage": 23.29,
|
|
"columns_with_missing": {
|
|
"land_type_en": 18870,
|
|
"prop_sub_type_en": 35539,
|
|
"actual_area": 1601,
|
|
"pre_registration_number": 67350,
|
|
"master_project_en": 186784,
|
|
"project_number": 105937,
|
|
"project_en": 105937
|
|
},
|
|
"missing_percentage_by_column": {
|
|
"land_type_en": 10.1,
|
|
"prop_sub_type_en": 19.03,
|
|
"actual_area": 0.86,
|
|
"pre_registration_number": 36.06,
|
|
"master_project_en": 99.99,
|
|
"project_number": 56.71,
|
|
"project_en": 56.71
|
|
},
|
|
"duplicate_rows": 7,
|
|
"duplicate_percentage": 0.0
|
|
},
|
|
"statistical_summary": {
|
|
"numeric_columns_count": 3,
|
|
"categorical_columns_count": 9,
|
|
"datetime_columns_count": 0,
|
|
"data_types_distribution": {
|
|
"object": 9,
|
|
"float64": 2,
|
|
"int64": 1
|
|
},
|
|
"potential_identifier_columns": []
|
|
}
|
|
} |