662 lines
18 KiB
JSON
662 lines
18 KiB
JSON
{
|
|
"file_info": {
|
|
"file_path": "./transactions_cleaned.csv",
|
|
"total_rows": 218518,
|
|
"total_columns": 22,
|
|
"memory_usage_mb": 197.48046779632568,
|
|
"total_cells": 4807396
|
|
},
|
|
"structural_info": {
|
|
"column_names": [
|
|
"transaction_number",
|
|
"instance_date",
|
|
"group_en",
|
|
"procedure_en",
|
|
"is_offplan_en",
|
|
"is_free_hold_en",
|
|
"usage_en",
|
|
"area_en",
|
|
"prop_type_en",
|
|
"prop_sb_type_en",
|
|
"trans_value",
|
|
"procedure_area",
|
|
"actual_area",
|
|
"rooms_en",
|
|
"parking",
|
|
"nearest_metro_en",
|
|
"nearest_mall_en",
|
|
"nearest_landmark_en",
|
|
"total_buyer",
|
|
"total_seller",
|
|
"master_project_en",
|
|
"project_en"
|
|
],
|
|
"dtypes": {
|
|
"transaction_number": "object",
|
|
"instance_date": "object",
|
|
"group_en": "object",
|
|
"procedure_en": "object",
|
|
"is_offplan_en": "object",
|
|
"is_free_hold_en": "object",
|
|
"usage_en": "object",
|
|
"area_en": "object",
|
|
"prop_type_en": "object",
|
|
"prop_sb_type_en": "object",
|
|
"trans_value": "float64",
|
|
"procedure_area": "float64",
|
|
"actual_area": "float64",
|
|
"rooms_en": "object",
|
|
"parking": "float64",
|
|
"nearest_metro_en": "object",
|
|
"nearest_mall_en": "object",
|
|
"nearest_landmark_en": "object",
|
|
"total_buyer": "int64",
|
|
"total_seller": "int64",
|
|
"master_project_en": "object",
|
|
"project_en": "object"
|
|
},
|
|
"index_info": {
|
|
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
|
|
"is_unique": true,
|
|
"has_duplicates": "False"
|
|
}
|
|
},
|
|
"columns_analysis": {
|
|
"transaction_number": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 211404,
|
|
"unique_percentage": 96.74,
|
|
"categorical_info": {
|
|
"most_frequent": "43-202-2025",
|
|
"most_frequent_count": 311,
|
|
"least_frequent": "41-13774-2025",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"43-202-2025": 311,
|
|
"49-57-2025": 273,
|
|
"43-21-2025": 217,
|
|
"43-234-2025": 190,
|
|
"49-83-2025": 189
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 8,
|
|
"max": 15,
|
|
"mean": 13.11555112164673,
|
|
"median": 13.0
|
|
}
|
|
},
|
|
"instance_date": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 208067,
|
|
"unique_percentage": 95.22,
|
|
"categorical_info": {
|
|
"most_frequent": "2025-07-10 17:03:09",
|
|
"most_frequent_count": 311,
|
|
"least_frequent": "2025-04-30 17:48:48",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"2025-07-10 17:03:09": 311,
|
|
"2025-05-30 14:37:12": 273,
|
|
"2025-01-29 15:49:05": 217,
|
|
"2025-08-07 12:45:12": 190,
|
|
"2025-08-18 16:12:54": 189
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 19,
|
|
"max": 19,
|
|
"mean": 19.0,
|
|
"median": 19.0
|
|
}
|
|
},
|
|
"group_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 3,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "sales",
|
|
"most_frequent_count": 170352,
|
|
"least_frequent": "gifts",
|
|
"least_frequent_count": 7735,
|
|
"top_5_values": {
|
|
"sales": 170352,
|
|
"mortgage": 40431,
|
|
"gifts": 7735
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 5,
|
|
"max": 8,
|
|
"mean": 5.555070978134524,
|
|
"median": 5.0
|
|
}
|
|
},
|
|
"procedure_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 37,
|
|
"unique_percentage": 0.02,
|
|
"categorical_info": {
|
|
"most_frequent": "sell - pre registration",
|
|
"most_frequent_count": 103781,
|
|
"least_frequent": "modify delayed mortgage",
|
|
"least_frequent_count": 3,
|
|
"top_5_values": {
|
|
"sell - pre registration": 103781,
|
|
"sale": 42941,
|
|
"mortgage registration": 26972,
|
|
"delayed sell": 20249,
|
|
"grant": 6321
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 48,
|
|
"mean": 17.5632304890215,
|
|
"median": 23.0
|
|
}
|
|
},
|
|
"is_offplan_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "ready",
|
|
"most_frequent_count": 113341,
|
|
"least_frequent": "off-plan",
|
|
"least_frequent_count": 105177,
|
|
"top_5_values": {
|
|
"ready": 113341,
|
|
"off-plan": 105177
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 5,
|
|
"max": 8,
|
|
"mean": 6.443958850071848,
|
|
"median": 5.0
|
|
}
|
|
},
|
|
"is_free_hold_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "free hold",
|
|
"most_frequent_count": 209097,
|
|
"least_frequent": "non free hold",
|
|
"least_frequent_count": 9421,
|
|
"top_5_values": {
|
|
"free hold": 209097,
|
|
"non free hold": 9421
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 9,
|
|
"max": 13,
|
|
"mean": 9.17245261259942,
|
|
"median": 9.0
|
|
}
|
|
},
|
|
"usage_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "residential",
|
|
"most_frequent_count": 213004,
|
|
"least_frequent": "commercial",
|
|
"least_frequent_count": 5514,
|
|
"top_5_values": {
|
|
"residential": 213004,
|
|
"commercial": 5514
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 10,
|
|
"max": 11,
|
|
"mean": 10.974766380801581,
|
|
"median": 11.0
|
|
}
|
|
},
|
|
"area_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 268,
|
|
"unique_percentage": 0.12,
|
|
"categorical_info": {
|
|
"most_frequent": "jumeirah village circle",
|
|
"most_frequent_count": 18939,
|
|
"least_frequent": "dubai international airport",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"jumeirah village circle": 18939,
|
|
"business bay": 14511,
|
|
"dubai land residence complex": 7341,
|
|
"al yelayiss 1": 5812,
|
|
"madinat al mataar": 5787
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 33,
|
|
"mean": 15.837455038028903,
|
|
"median": 14.0
|
|
}
|
|
},
|
|
"prop_type_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 3,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "unit",
|
|
"most_frequent_count": 169915,
|
|
"least_frequent": "building",
|
|
"least_frequent_count": 15967,
|
|
"top_5_values": {
|
|
"unit": 169915,
|
|
"land": 32636,
|
|
"building": 15967
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 8,
|
|
"mean": 4.292277981676567,
|
|
"median": 4.0
|
|
}
|
|
},
|
|
"prop_sb_type_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 208951,
|
|
"null_count": 9567,
|
|
"null_percentage": 4.38,
|
|
"unique_values": 42,
|
|
"unique_percentage": 0.02,
|
|
"categorical_info": {
|
|
"most_frequent": "flat",
|
|
"most_frequent_count": 158612,
|
|
"least_frequent": "health club",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"flat": 158612,
|
|
"villa": 16313,
|
|
"residential": 13429,
|
|
"commercial": 5148,
|
|
"office": 5020
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 50,
|
|
"mean": 5.052806638877057,
|
|
"median": 4.0
|
|
}
|
|
},
|
|
"trans_value": {
|
|
"dtype": "float64",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 73759,
|
|
"unique_percentage": 33.75,
|
|
"statistics": {
|
|
"mean": 3283859.255751517,
|
|
"median": 1500000.0,
|
|
"std": 15313415.930326678,
|
|
"min": 10.77,
|
|
"max": 1578530000.0,
|
|
"q25": 910000.0,
|
|
"q75": 2750000.0,
|
|
"skewness": 46.95032216760977,
|
|
"kurtosis": 3329.9980178193723
|
|
},
|
|
"outliers": {
|
|
"count": 19301,
|
|
"percentage": 8.83
|
|
}
|
|
},
|
|
"procedure_area": {
|
|
"dtype": "float64",
|
|
"non_null_count": 218466,
|
|
"null_count": 52,
|
|
"null_percentage": 0.02,
|
|
"unique_values": 35074,
|
|
"unique_percentage": 16.05,
|
|
"statistics": {
|
|
"mean": 261.3489334267117,
|
|
"median": 90.36,
|
|
"std": 5415.7054677162405,
|
|
"min": 0.0,
|
|
"max": 1715752.11,
|
|
"q25": 64.22,
|
|
"q75": 152.31,
|
|
"skewness": 222.57109352903205,
|
|
"kurtosis": 61279.78606538791
|
|
},
|
|
"outliers": {
|
|
"count": 26361,
|
|
"percentage": 12.06
|
|
}
|
|
},
|
|
"actual_area": {
|
|
"dtype": "float64",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 34584,
|
|
"unique_percentage": 15.83,
|
|
"statistics": {
|
|
"mean": 262.78477356556436,
|
|
"median": 91.08,
|
|
"std": 5382.863713033483,
|
|
"min": 5.17,
|
|
"max": 1715752.11,
|
|
"q25": 65.06,
|
|
"q75": 153.48,
|
|
"skewness": 225.535006380266,
|
|
"kurtosis": 62535.69611765087
|
|
},
|
|
"outliers": {
|
|
"count": 26523,
|
|
"percentage": 12.14
|
|
}
|
|
},
|
|
"rooms_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 183145,
|
|
"null_count": 35373,
|
|
"null_percentage": 16.19,
|
|
"unique_values": 14,
|
|
"unique_percentage": 0.01,
|
|
"categorical_info": {
|
|
"most_frequent": "1 b/r",
|
|
"most_frequent_count": 71525,
|
|
"least_frequent": "10 b/r",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"1 b/r": 71525,
|
|
"2 b/r": 42780,
|
|
"studio": 37722,
|
|
"3 b/r": 18375,
|
|
"4 b/r": 7022
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 3,
|
|
"max": 11,
|
|
"mean": 5.224144803297933,
|
|
"median": 5.0
|
|
}
|
|
},
|
|
"parking": {
|
|
"dtype": "float64",
|
|
"non_null_count": 95220,
|
|
"null_count": 123298,
|
|
"null_percentage": 56.42,
|
|
"unique_values": 558,
|
|
"unique_percentage": 0.59,
|
|
"statistics": {
|
|
"mean": 22.358372916115005,
|
|
"median": 1.0,
|
|
"std": 1592.0959438077602,
|
|
"min": -1.0,
|
|
"max": 297298.0,
|
|
"q25": 1.0,
|
|
"q75": 1.0,
|
|
"skewness": 144.8271736848027,
|
|
"kurtosis": 24036.929143027846
|
|
},
|
|
"outliers": {
|
|
"count": 7633,
|
|
"percentage": 3.49
|
|
}
|
|
},
|
|
"nearest_metro_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 120617,
|
|
"null_count": 97901,
|
|
"null_percentage": 44.8,
|
|
"unique_values": 55,
|
|
"unique_percentage": 0.05,
|
|
"categorical_info": {
|
|
"most_frequent": "dubai internet city",
|
|
"most_frequent_count": 16919,
|
|
"least_frequent": "ggico metro station",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"dubai internet city": 16919,
|
|
"business bay metro station": 13248,
|
|
"nakheel metro station": 11501,
|
|
"buj khalifa dubai mall metro station": 10814,
|
|
"sharaf dg metro station": 7911
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 9,
|
|
"max": 36,
|
|
"mean": 23.059717950206025,
|
|
"median": 23.0
|
|
}
|
|
},
|
|
"nearest_mall_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 118121,
|
|
"null_count": 100397,
|
|
"null_percentage": 45.94,
|
|
"unique_values": 5,
|
|
"unique_percentage": 0.0,
|
|
"categorical_info": {
|
|
"most_frequent": "marina mall",
|
|
"most_frequent_count": 38565,
|
|
"least_frequent": "city centre mirdif",
|
|
"least_frequent_count": 8743,
|
|
"top_5_values": {
|
|
"marina mall": 38565,
|
|
"dubai mall": 32510,
|
|
"mall of the emirates": 27759,
|
|
"ibn-e-battuta mall": 10544,
|
|
"city centre mirdif": 8743
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 10,
|
|
"max": 20,
|
|
"mean": 13.982788835177487,
|
|
"median": 11.0
|
|
}
|
|
},
|
|
"nearest_landmark_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 145904,
|
|
"null_count": 72614,
|
|
"null_percentage": 33.23,
|
|
"unique_values": 14,
|
|
"unique_percentage": 0.01,
|
|
"categorical_info": {
|
|
"most_frequent": "sports city swimming academy",
|
|
"most_frequent_count": 42232,
|
|
"least_frequent": "jabel ali",
|
|
"least_frequent_count": 3,
|
|
"top_5_values": {
|
|
"sports city swimming academy": 42232,
|
|
"downtown dubai": 22411,
|
|
"motor city": 18649,
|
|
"img world adventures": 18410,
|
|
"burj al arab": 14198
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 9,
|
|
"max": 33,
|
|
"mean": 18.984201941002304,
|
|
"median": 20.0
|
|
}
|
|
},
|
|
"total_buyer": {
|
|
"dtype": "int64",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 1,
|
|
"unique_percentage": 0.0,
|
|
"statistics": {
|
|
"mean": 0.0,
|
|
"median": 0.0,
|
|
"std": 0.0,
|
|
"min": 0.0,
|
|
"max": 0.0,
|
|
"q25": 0.0,
|
|
"q75": 0.0,
|
|
"skewness": 0.0,
|
|
"kurtosis": 0.0
|
|
},
|
|
"outliers": {
|
|
"count": 0,
|
|
"percentage": 0.0
|
|
}
|
|
},
|
|
"total_seller": {
|
|
"dtype": "int64",
|
|
"non_null_count": 218518,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 1,
|
|
"unique_percentage": 0.0,
|
|
"statistics": {
|
|
"mean": 0.0,
|
|
"median": 0.0,
|
|
"std": 0.0,
|
|
"min": 0.0,
|
|
"max": 0.0,
|
|
"q25": 0.0,
|
|
"q75": 0.0,
|
|
"skewness": 0.0,
|
|
"kurtosis": 0.0
|
|
},
|
|
"outliers": {
|
|
"count": 0,
|
|
"percentage": 0.0
|
|
}
|
|
},
|
|
"master_project_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 502,
|
|
"null_count": 218016,
|
|
"null_percentage": 99.77,
|
|
"unique_values": 8,
|
|
"unique_percentage": 1.59,
|
|
"categorical_info": {
|
|
"most_frequent": "maison elysee ii",
|
|
"most_frequent_count": 171,
|
|
"least_frequent": "hills park",
|
|
"least_frequent_count": 6,
|
|
"top_5_values": {
|
|
"maison elysee ii": 171,
|
|
"elysee heights": 153,
|
|
"jebel ali village townhouses- phase 1": 45,
|
|
"remraam - al ramth": 45,
|
|
"maison elysee iii by pantheon": 37
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 6,
|
|
"max": 37,
|
|
"mean": 17.44223107569721,
|
|
"median": 16.0
|
|
}
|
|
},
|
|
"project_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 187284,
|
|
"null_count": 31234,
|
|
"null_percentage": 14.29,
|
|
"unique_values": 2690,
|
|
"unique_percentage": 1.44,
|
|
"categorical_info": {
|
|
"most_frequent": "binghatti skyrise",
|
|
"most_frequent_count": 2438,
|
|
"least_frequent": "sondos rosa",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"binghatti skyrise": 2438,
|
|
"sobha solis": 1687,
|
|
"binghatti elite": 1640,
|
|
"skyvue": 1480,
|
|
"sobha orbis": 1383
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 2,
|
|
"max": 77,
|
|
"mean": 17.992311142436087,
|
|
"median": 16.0
|
|
}
|
|
}
|
|
},
|
|
"data_quality": {
|
|
"total_missing_values": 688452,
|
|
"missing_percentage": 14.32,
|
|
"columns_with_missing": {
|
|
"prop_sb_type_en": 9567,
|
|
"procedure_area": 52,
|
|
"rooms_en": 35373,
|
|
"parking": 123298,
|
|
"nearest_metro_en": 97901,
|
|
"nearest_mall_en": 100397,
|
|
"nearest_landmark_en": 72614,
|
|
"master_project_en": 218016,
|
|
"project_en": 31234
|
|
},
|
|
"missing_percentage_by_column": {
|
|
"prop_sb_type_en": 4.38,
|
|
"procedure_area": 0.02,
|
|
"rooms_en": 16.19,
|
|
"parking": 56.42,
|
|
"nearest_metro_en": 44.8,
|
|
"nearest_mall_en": 45.94,
|
|
"nearest_landmark_en": 33.23,
|
|
"master_project_en": 99.77,
|
|
"project_en": 14.29
|
|
},
|
|
"duplicate_rows": 2704,
|
|
"duplicate_percentage": 1.24
|
|
},
|
|
"statistical_summary": {
|
|
"numeric_columns_count": 6,
|
|
"categorical_columns_count": 16,
|
|
"datetime_columns_count": 0,
|
|
"data_types_distribution": {
|
|
"object": 16,
|
|
"float64": 4,
|
|
"int64": 2
|
|
},
|
|
"potential_identifier_columns": []
|
|
}
|
|
} |