1
+ #Name: Vineet Dcunha
2
+ #"I have not given or received any unauthorized assistance on this assignment."
3
+
4
+ import csv
5
+ import sqlite3
6
+ import os
7
+ import ast
8
+ import datetime as dt
9
+
10
+ conn = sqlite3 .connect ('dsc450.db' )
11
+ c = conn .cursor ()
12
+
13
+ createdrivertbl = """
14
+ CREATE TABLE IF NOT EXISTS DRIVER_DTL (
15
+ LICENSE_NUMBER NUMBER(20),
16
+ RENEWED VARCHAR2(10),
17
+ STATUS VARCHAR2(20),
18
+ STATUS_DATE DATE,
19
+ DRIVER_TYPE VARCHAR2(20),
20
+ LICENSE_TYPE VARCHAR2(20),
21
+ ORIGINAL_ISSUE_DATE DATE,
22
+ NAME VARCHAR2(100),
23
+ SEX VARCHAR2(8),
24
+ CHAUFFEUR_CITY VARCHAR2(25),
25
+ CHAUFFEUR_STATE VARCHAR2(5),
26
+ RECORD_NUMBER VARCHAR2(20)
27
+ );
28
+ """
29
+
30
+ c .execute ('DROP TABLE IF EXISTS DRIVER_DTL;' )
31
+ c .execute (createdrivertbl ) # create the DRIVER_DTL table
32
+
33
+ os .chdir ("C:/Users/USER/Desktop/DSC/DSC_450 Database For Analytics/Assignment/Assignment_5" )
34
+
35
+ fd = open ('Public_Chauffeurs_Short_hw3.csv' , 'r' ) # Read csv file
36
+ reader = csv .reader (fd )
37
+ next (reader ) # skip header
38
+ data = list ()
39
+ for row in reader :
40
+ data .append (row ) # loop thru the csv file and add data to the list
41
+ cleandata = list ()
42
+ for i in data : # loop thru the list to clean and format the data
43
+ col2 = i [1 ]
44
+ if i [1 ] is None or i [1 ] == "" or 'null' in i [1 ].lower (): # check for null and blank values
45
+ col2 = None
46
+ else :
47
+ col2 = col2 .replace ('-' ,'/' ) # replace '-' with '/' to have a consistent format
48
+ col4 = i [3 ]
49
+ if i [3 ] is None or i [3 ] == "" or 'null' in i [3 ].lower (): # check for null and blank values
50
+ col4 = 'None'
51
+ else :
52
+ col4 = col4 .replace ('/' ,'-' ) # replace '-' with '/' to have a consistent format
53
+ col4 = dt .datetime .strptime (str (col4 ),"%m-%d-%Y" ).date () # cast to date type
54
+ col6 = i [5 ]
55
+ if i [5 ] is None or i [5 ] == "" or 'null' in i [5 ].lower ():
56
+ col6 = 'None'
57
+ col7 = i [6 ]
58
+ if i [6 ] is None or i [6 ] == "" or 'null' in i [6 ].lower ():
59
+ col7 = 'None'
60
+ else :
61
+ col7 = col7 .replace ('/' ,'-' ) # replace '-' with '/' to have a consistent format
62
+ col7 = dt .datetime .strptime (str (col7 ),"%m-%d-%Y" ).date () # cast to date type
63
+ col1 ,col3 ,col5 ,col8 ,col9 ,col10 ,col11 ,col12 = i [0 ],i [2 ],i [4 ],i [7 ],i [8 ],i [9 ],i [10 ],i [11 ]
64
+ cleandata .append ([col1 ,col2 ,col3 ,col4 ,col5 ,col6 ,col7 ,col8 ,col9 ,col10 ,col11 ,col12 ])# combine all columns together and insert in a list
65
+ c .executemany ('INSERT INTO DRIVER_DTL VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )' , cleandata ) # insert into table
66
+
67
+ result = c .execute ('SELECT COUNT(1) FROM DRIVER_DTL' ) # count number of records
68
+ r = result .fetchall ()
69
+ print ('Total number of records:' ,r [0 ][0 ])
70
+
71
+ result2 = c .execute ('SELECT COUNT(DISTINCT LICENSE_NUMBER) FROM DRIVER_DTL' ) # count number of distinct LICENSE_NUMBER
72
+ r2 = result2 .fetchall ()
73
+ print ('Total distinct number of LICENSE_NUMBER:' ,r2 [0 ][0 ])
74
+
75
+ result3 = c .execute ('SELECT COUNT(1) FROM DRIVER_DTL WHERE RENEWED IS NULL' ) # count number of distinct LICENSE_NUMBER
76
+ r3 = result3 .fetchall ()
77
+ print ('Total number of NULL RENEWED record:' ,r3 [0 ][0 ])
78
+
79
+
80
+ fd .close ()
81
+
82
+ conn .commit () # finalize inserted data
83
+ conn .close () # close the connection
0 commit comments