dld_backend/valuations_cleaned.json
2025-10-30 12:13:02 +05:30

311 lines
8.0 KiB
JSON

{
"file_info": {
"file_path": "./valuations_cleaned.csv",
"total_rows": 3560,
"total_columns": 10,
"memory_usage_mb": 0.9797344207763672,
"total_cells": 35600
},
"structural_info": {
"column_names": [
"property_total_value",
"area_en",
"actual_area",
"procedure_year",
"procedure_number",
"instance_date",
"actual_worth",
"procedure_area",
"property_type_en",
"prop_sub_type_en"
],
"dtypes": {
"property_total_value": "int64",
"area_en": "object",
"actual_area": "float64",
"procedure_year": "int64",
"procedure_number": "int64",
"instance_date": "object",
"actual_worth": "float64",
"procedure_area": "float64",
"property_type_en": "object",
"prop_sub_type_en": "object"
},
"index_info": {
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
"is_unique": true,
"has_duplicates": "False"
}
},
"columns_analysis": {
"property_total_value": {
"dtype": "int64",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2678,
"unique_percentage": 75.22,
"statistics": {
"mean": 38248395.0511236,
"median": 6000000.0,
"std": 169640700.59881753,
"min": 929.0,
"max": 3537281982.0,
"q25": 2209194.5,
"q75": 20055641.5,
"skewness": 12.568309798480032,
"kurtosis": 200.3268145928862
},
"outliers": {
"count": 455,
"percentage": 12.78
}
},
"area_en": {
"dtype": "object",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 187,
"unique_percentage": 5.25,
"categorical_info": {
"most_frequent": "marsa dubai",
"most_frequent_count": 142,
"least_frequent": "jabal ali third",
"least_frequent_count": 1,
"top_5_values": {
"marsa dubai": 142,
"world islands": 124,
"business bay": 106,
"al thanyah fifth": 95,
"hadaeq sheikh mohammed bin rashid": 94
}
},
"string_length": {
"min": 4,
"max": 33,
"mean": 14.510393258426966,
"median": 14.0
}
},
"actual_area": {
"dtype": "float64",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2908,
"unique_percentage": 81.69,
"statistics": {
"mean": 11457.755162921349,
"median": 747.46,
"std": 137240.71723399212,
"min": 18.19,
"max": 4265460.92,
"q25": 174.6925,
"q75": 2183.6800000000003,
"skewness": 23.14761437162824,
"kurtosis": 592.3560030328136
},
"outliers": {
"count": 419,
"percentage": 11.77
}
},
"procedure_year": {
"dtype": "int64",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 1,
"unique_percentage": 0.03,
"statistics": {
"mean": 2025.0,
"median": 2025.0,
"std": 0.0,
"min": 2025.0,
"max": 2025.0,
"q25": 2025.0,
"q75": 2025.0,
"skewness": 0.0,
"kurtosis": 0.0
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"procedure_number": {
"dtype": "int64",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 3560,
"unique_percentage": 100.0,
"statistics": {
"mean": 4656.197471910113,
"median": 4661.5,
"std": 1249.719490658678,
"min": 2527.0,
"max": 6869.0,
"q25": 3574.75,
"q75": 5736.25,
"skewness": 0.00766239403115241,
"kurtosis": -1.2077887720785234
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"instance_date": {
"dtype": "object",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 3557,
"unique_percentage": 99.92,
"categorical_info": {
"most_frequent": "2025-09-25 12:55:40",
"most_frequent_count": 2,
"least_frequent": "2025-07-02 13:39:57",
"least_frequent_count": 1,
"top_5_values": {
"2025-09-25 12:55:40": 2,
"2025-05-13 09:41:19": 2,
"2025-07-09 12:10:13": 2,
"2025-09-26 08:03:52": 1,
"2025-07-29 11:11:36": 1
}
},
"string_length": {
"min": 19,
"max": 19,
"mean": 19.0,
"median": 19.0
}
},
"actual_worth": {
"dtype": "float64",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2678,
"unique_percentage": 75.22,
"statistics": {
"mean": 38248395.05212079,
"median": 6000000.0,
"std": 169640700.59859532,
"min": 929.0,
"max": 3537281982.0,
"q25": 2209194.2474999996,
"q75": 20055641.5,
"skewness": 12.568309798512665,
"kurtosis": 200.3268145936555
},
"outliers": {
"count": 455,
"percentage": 12.78
}
},
"procedure_area": {
"dtype": "float64",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2902,
"unique_percentage": 81.52,
"statistics": {
"mean": 11516.71595786517,
"median": 749.96,
"std": 137393.5118824958,
"min": 18.19,
"max": 4265460.92,
"q25": 174.6925,
"q75": 2202.1725,
"skewness": 23.082118078880825,
"kurtosis": 589.7441977518072
},
"outliers": {
"count": 419,
"percentage": 11.77
}
},
"property_type_en": {
"dtype": "object",
"non_null_count": 3560,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 3,
"unique_percentage": 0.08,
"categorical_info": {
"most_frequent": "land",
"most_frequent_count": 2343,
"least_frequent": "building",
"least_frequent_count": 177,
"top_5_values": {
"land": 2343,
"unit": 1040,
"building": 177
}
},
"string_length": {
"min": 4,
"max": 8,
"mean": 4.198876404494382,
"median": 4.0
}
},
"prop_sub_type_en": {
"dtype": "object",
"non_null_count": 3292,
"null_count": 268,
"null_percentage": 7.53,
"unique_values": 43,
"unique_percentage": 1.31,
"categorical_info": {
"most_frequent": "commercial",
"most_frequent_count": 794,
"least_frequent": "railway station",
"least_frequent_count": 1,
"top_5_values": {
"commercial": 794,
"flat": 794,
"residential": 746,
"villa": 200,
"office": 112
}
},
"string_length": {
"min": 4,
"max": 34,
"mean": 8.768226002430133,
"median": 10.0
}
}
},
"data_quality": {
"total_missing_values": 268,
"missing_percentage": 0.75,
"columns_with_missing": {
"prop_sub_type_en": 268
},
"missing_percentage_by_column": {
"prop_sub_type_en": 7.53
},
"duplicate_rows": 0,
"duplicate_percentage": 0.0
},
"statistical_summary": {
"numeric_columns_count": 6,
"categorical_columns_count": 4,
"datetime_columns_count": 0,
"data_types_distribution": {
"object": 4,
"int64": 3,
"float64": 3
},
"potential_identifier_columns": [
"procedure_number"
]
}
}