311 lines
8.0 KiB
JSON
311 lines
8.0 KiB
JSON
{
|
|
"file_info": {
|
|
"file_path": "./valuations_cleaned.csv",
|
|
"total_rows": 3560,
|
|
"total_columns": 10,
|
|
"memory_usage_mb": 0.9797344207763672,
|
|
"total_cells": 35600
|
|
},
|
|
"structural_info": {
|
|
"column_names": [
|
|
"property_total_value",
|
|
"area_en",
|
|
"actual_area",
|
|
"procedure_year",
|
|
"procedure_number",
|
|
"instance_date",
|
|
"actual_worth",
|
|
"procedure_area",
|
|
"property_type_en",
|
|
"prop_sub_type_en"
|
|
],
|
|
"dtypes": {
|
|
"property_total_value": "int64",
|
|
"area_en": "object",
|
|
"actual_area": "float64",
|
|
"procedure_year": "int64",
|
|
"procedure_number": "int64",
|
|
"instance_date": "object",
|
|
"actual_worth": "float64",
|
|
"procedure_area": "float64",
|
|
"property_type_en": "object",
|
|
"prop_sub_type_en": "object"
|
|
},
|
|
"index_info": {
|
|
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
|
|
"is_unique": true,
|
|
"has_duplicates": "False"
|
|
}
|
|
},
|
|
"columns_analysis": {
|
|
"property_total_value": {
|
|
"dtype": "int64",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2678,
|
|
"unique_percentage": 75.22,
|
|
"statistics": {
|
|
"mean": 38248395.0511236,
|
|
"median": 6000000.0,
|
|
"std": 169640700.59881753,
|
|
"min": 929.0,
|
|
"max": 3537281982.0,
|
|
"q25": 2209194.5,
|
|
"q75": 20055641.5,
|
|
"skewness": 12.568309798480032,
|
|
"kurtosis": 200.3268145928862
|
|
},
|
|
"outliers": {
|
|
"count": 455,
|
|
"percentage": 12.78
|
|
}
|
|
},
|
|
"area_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 187,
|
|
"unique_percentage": 5.25,
|
|
"categorical_info": {
|
|
"most_frequent": "marsa dubai",
|
|
"most_frequent_count": 142,
|
|
"least_frequent": "jabal ali third",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"marsa dubai": 142,
|
|
"world islands": 124,
|
|
"business bay": 106,
|
|
"al thanyah fifth": 95,
|
|
"hadaeq sheikh mohammed bin rashid": 94
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 33,
|
|
"mean": 14.510393258426966,
|
|
"median": 14.0
|
|
}
|
|
},
|
|
"actual_area": {
|
|
"dtype": "float64",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2908,
|
|
"unique_percentage": 81.69,
|
|
"statistics": {
|
|
"mean": 11457.755162921349,
|
|
"median": 747.46,
|
|
"std": 137240.71723399212,
|
|
"min": 18.19,
|
|
"max": 4265460.92,
|
|
"q25": 174.6925,
|
|
"q75": 2183.6800000000003,
|
|
"skewness": 23.14761437162824,
|
|
"kurtosis": 592.3560030328136
|
|
},
|
|
"outliers": {
|
|
"count": 419,
|
|
"percentage": 11.77
|
|
}
|
|
},
|
|
"procedure_year": {
|
|
"dtype": "int64",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 1,
|
|
"unique_percentage": 0.03,
|
|
"statistics": {
|
|
"mean": 2025.0,
|
|
"median": 2025.0,
|
|
"std": 0.0,
|
|
"min": 2025.0,
|
|
"max": 2025.0,
|
|
"q25": 2025.0,
|
|
"q75": 2025.0,
|
|
"skewness": 0.0,
|
|
"kurtosis": 0.0
|
|
},
|
|
"outliers": {
|
|
"count": 0,
|
|
"percentage": 0.0
|
|
}
|
|
},
|
|
"procedure_number": {
|
|
"dtype": "int64",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 3560,
|
|
"unique_percentage": 100.0,
|
|
"statistics": {
|
|
"mean": 4656.197471910113,
|
|
"median": 4661.5,
|
|
"std": 1249.719490658678,
|
|
"min": 2527.0,
|
|
"max": 6869.0,
|
|
"q25": 3574.75,
|
|
"q75": 5736.25,
|
|
"skewness": 0.00766239403115241,
|
|
"kurtosis": -1.2077887720785234
|
|
},
|
|
"outliers": {
|
|
"count": 0,
|
|
"percentage": 0.0
|
|
}
|
|
},
|
|
"instance_date": {
|
|
"dtype": "object",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 3557,
|
|
"unique_percentage": 99.92,
|
|
"categorical_info": {
|
|
"most_frequent": "2025-09-25 12:55:40",
|
|
"most_frequent_count": 2,
|
|
"least_frequent": "2025-07-02 13:39:57",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"2025-09-25 12:55:40": 2,
|
|
"2025-05-13 09:41:19": 2,
|
|
"2025-07-09 12:10:13": 2,
|
|
"2025-09-26 08:03:52": 1,
|
|
"2025-07-29 11:11:36": 1
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 19,
|
|
"max": 19,
|
|
"mean": 19.0,
|
|
"median": 19.0
|
|
}
|
|
},
|
|
"actual_worth": {
|
|
"dtype": "float64",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2678,
|
|
"unique_percentage": 75.22,
|
|
"statistics": {
|
|
"mean": 38248395.05212079,
|
|
"median": 6000000.0,
|
|
"std": 169640700.59859532,
|
|
"min": 929.0,
|
|
"max": 3537281982.0,
|
|
"q25": 2209194.2474999996,
|
|
"q75": 20055641.5,
|
|
"skewness": 12.568309798512665,
|
|
"kurtosis": 200.3268145936555
|
|
},
|
|
"outliers": {
|
|
"count": 455,
|
|
"percentage": 12.78
|
|
}
|
|
},
|
|
"procedure_area": {
|
|
"dtype": "float64",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2902,
|
|
"unique_percentage": 81.52,
|
|
"statistics": {
|
|
"mean": 11516.71595786517,
|
|
"median": 749.96,
|
|
"std": 137393.5118824958,
|
|
"min": 18.19,
|
|
"max": 4265460.92,
|
|
"q25": 174.6925,
|
|
"q75": 2202.1725,
|
|
"skewness": 23.082118078880825,
|
|
"kurtosis": 589.7441977518072
|
|
},
|
|
"outliers": {
|
|
"count": 419,
|
|
"percentage": 11.77
|
|
}
|
|
},
|
|
"property_type_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 3560,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 3,
|
|
"unique_percentage": 0.08,
|
|
"categorical_info": {
|
|
"most_frequent": "land",
|
|
"most_frequent_count": 2343,
|
|
"least_frequent": "building",
|
|
"least_frequent_count": 177,
|
|
"top_5_values": {
|
|
"land": 2343,
|
|
"unit": 1040,
|
|
"building": 177
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 8,
|
|
"mean": 4.198876404494382,
|
|
"median": 4.0
|
|
}
|
|
},
|
|
"prop_sub_type_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 3292,
|
|
"null_count": 268,
|
|
"null_percentage": 7.53,
|
|
"unique_values": 43,
|
|
"unique_percentage": 1.31,
|
|
"categorical_info": {
|
|
"most_frequent": "commercial",
|
|
"most_frequent_count": 794,
|
|
"least_frequent": "railway station",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"commercial": 794,
|
|
"flat": 794,
|
|
"residential": 746,
|
|
"villa": 200,
|
|
"office": 112
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 34,
|
|
"mean": 8.768226002430133,
|
|
"median": 10.0
|
|
}
|
|
}
|
|
},
|
|
"data_quality": {
|
|
"total_missing_values": 268,
|
|
"missing_percentage": 0.75,
|
|
"columns_with_missing": {
|
|
"prop_sub_type_en": 268
|
|
},
|
|
"missing_percentage_by_column": {
|
|
"prop_sub_type_en": 7.53
|
|
},
|
|
"duplicate_rows": 0,
|
|
"duplicate_percentage": 0.0
|
|
},
|
|
"statistical_summary": {
|
|
"numeric_columns_count": 6,
|
|
"categorical_columns_count": 4,
|
|
"datetime_columns_count": 0,
|
|
"data_types_distribution": {
|
|
"object": 4,
|
|
"int64": 3,
|
|
"float64": 3
|
|
},
|
|
"potential_identifier_columns": [
|
|
"procedure_number"
|
|
]
|
|
}
|
|
} |