Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHAT_FORMAT_README
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
This is a Readme to explain the chat_formatting.py.

How to use:

python3 chat_formatting.py input_file_name output_file_name input_date_format

Explanation:

input_file_name: It is your raw exported whatapp chat

output_file_name: This will be your formatted chat file after you execute the code.

input_date_format: Now, every device has different way to export the chat and mostly the date format is conflicted. The code uses mmddyy format but several exports will have ddmmyy as the date format. It would be tiresome to let the code handle all of them. So in this case you just have to input the format you have and the code should convert it into acceptable format. Right now it only takes two format but it isn't difficult to do so.

OUTPUT:

Once running succesfully it will generate two output files. one by the name you provided "output_file_name" and other is "formatted_time.txt". The second one is using the fix provided in the repo which cchanged the time from original 24hr format to 12 hr format.

So basically the code without the suggested fix should use output_file_name as the chat file and if the fix is approved then you should use formatted_time.txt.
156 changes: 156 additions & 0 deletions chat_formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import re
from datetime import datetime
import sys

#-----------------------
n = len(sys.argv)
print("Total arguments passed:", n)

input_file_name = sys.argv[1]
output_file_name = sys.argv[2]
input_date_format = sys.argv[3]

# Function to convert the date format
def convert24(str1):

'''
This function takes time in am/pm as input and returns the 24 hr format

'''

# Checking if last two elements of time
# is AM and first two elements are 12
if str1[-2:] == "am" and str1[:2] == "12":
return "00" + str1[2:-2]

# remove the AM
elif str1[-2:] == "am":
return str1[:-2]

# Checking if last two elements of time
# is PM and first two elements are 12
elif str1[-2:] == "pm" and str1[:2] == "12":
return str1[:-2]

else:

# add 12 to hours and remove PM
return str(int(str1[:2]) + 12) + str1[2:5]

#---------------------------------------------------

def format_input_file(input_file_name):

'''
This needs more work.
This function cleans the exported chat to remove lines which
do not conform to the acceptable format. For eg. Chats which
are of multiple lines and with spaces within.

'''

WUser = r'(- (?P<username>[^:]*):)' # To get the user's name
WDate = r'(?P<date>(?P<month>[0-9]{1,2})[-\/]{1}(?P<day>[0-9]{1,2})[-\/]{1}(?P<year>[0-9]{2}))'
WTime = r'(, (?P<time>(?P<hour>[0-9]{1,2}):(?P<minute>[0-9]{2}) (?P<ampm>AM|PM|am|pm)) )'
# to get the parsed message
WMsg = WDate + WTime + WUser + r'(?P<message>.*)'
formatted_input_list = []

with open(input_file_name, "r") as file_input:
for line in file_input:
match = re.search(WMsg, line)
if match:
formatted_input_list.append(line)
return formatted_input_list

#-----------------------------------------------

def format_date(formatted_input_list,input_date_format): #ddmmyy

'''
This function formats the date in exported chat into acceptable format

'''

dict_date = {"ddmmyy": "%d/%m/%y","mmddyy": "%m/%d/%y"}
WDate = r'(?P<date>(?P<day>[0-9]{1,2})[-\/]{1}(?P<month>[0-9]{1,2})[-\/]{1}(?P<year>[0-9]{2}))'
new_full_string = []
for line in range(len(formatted_input_list) -1):
match = re.search(WDate,formatted_input_list[line])
old_format = match.group()
datetimeobject = datetime.strptime(old_format,dict_date[input_date_format])
new_format = datetimeobject.strftime("%m/%d/%y")
full_string = formatted_input_list[line]
new_full_string.append(full_string.replace(old_format,new_format))
return new_full_string


def format_time_string(string):

'''
This function formats the time from 12hr format to 24hr format
'''



if string[0] > '1':
return string[:0] + "0" + string[0:]
elif string[0] == '1' and string[1] != '1' and string[1] != ':' :
return string
elif string[0] == '1' and string[1] == '1' :
return string
elif string[0] == '1' and string[1] == ':' :
return string[:0] + "0" + string[0:]

#-----------------------------------------------------

def format_time(new_full_string):
'''
This function formats the string to have 24hr format time
'''




new_full_string_time = []
for line in range(len(new_full_string)):
time_string = new_full_string[line][10:18].strip()
formatted_time_string = format_time_string(time_string)
new_time_string = convert24(formatted_time_string).strip()
full_string = new_full_string[line]
new_full_string_time.append(full_string.replace(time_string,new_time_string))
return new_full_string_time

#-------------------------------------------------------





def save_file(output_file_name):
'''
This saves the formatted file.

'''

with open(output_file_name, "w") as output:
for line in new_full_string:
output.write(line)


def save_file_original():
'''
This saves the formatted file with everything as the original code

'''

with open('formatted_time.txt', "w") as output:
for line in new_full_string_time:
output.write(line)


formatted_input_list = format_input_file(input_file_name)
new_full_string = format_date(formatted_input_list, input_date_format)
new_full_string_time = format_time(new_full_string)
save_file(output_file_name)
save_file_original()