forked from im-anishraj/arnio
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathauto_clean_tutorial.py
More file actions
48 lines (34 loc) · 1.13 KB
/
auto_clean_tutorial.py
File metadata and controls
48 lines (34 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""
Beginner-friendly data quality walkthrough for Arnio.
This example shows a small messy input, the profiling signals Arnio reports,
the suggested cleaning steps, and the difference between safe and strict
auto-cleaning.
"""
import pandas as pd
import arnio as ar
def main():
raw = pd.DataFrame(
{
"order_id": [1001, 1002, 1002, 1003, 1004],
"customer": [" Ishan ", " Prasoon ", " Prasoon ", " Pranay ", " Dhruv "],
"city": [" Paris ", "London", "London", " New York ", " Tokyo "],
}
)
frame = ar.from_pandas(raw)
print("--- Messy Input ---")
print(raw)
report = ar.profile(frame)
summary = report.summary()
suggestions = ar.suggest_cleaning(frame)
print("\n--- Profiling Summary ---")
print(summary)
print("\n--- Suggested Cleaning Steps ---")
print(suggestions)
safe = ar.auto_clean(frame)
print("\n--- auto_clean(mode='safe') ---")
print(ar.to_pandas(safe))
strict = ar.auto_clean(frame, mode="strict")
print("\n--- auto_clean(mode='strict') ---")
print(ar.to_pandas(strict))
if __name__ == "__main__":
main()