bobbycar/logdata_visualization/logfix.py

import numpy as np
from datetime import datetime
import time
import argparse

parser = argparse.ArgumentParser(description='Copys, renames and fixes logfiles written by bobbycar sd logger.')
parser.add_argument('--input', type=argparse.FileType('r'), nargs='+')
parser.add_argument('--output', nargs='?', type=argparse.FileType('w'))
args = parser.parse_args()

ok=True

def getTimestamp(plines):
    timestampline=-1
    timestampfound=False
    while not timestampfound:
        timestampline+=1
        timestampfound = (plines[timestampline].find('TIMESTAMP:')!=-1)
            
    timestamp=int(plines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created

    if (timestampline==-1):
        print("Error: Timestamp not found!")
        exit()

    return timestamp


def filterLines(plines,plinesStarttime=None):

    plines = [x.rstrip("\n") for x in plines] #remove \n
    pcommentlinesMask = [True if x.startswith('#') else False for x in plines] #generate mask for lines with comments

    
    plines=np.array(plines)
    pcommentlinesMask=np.array(pcommentlinesMask)

    if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
        plinesStarttime = plinesStarttime[pcommentlinesMask==False] #get lines with data

    pdatalines = plines[pcommentlinesMask==False] #get lines with data

    
    pheader = pdatalines[0] #header is the first non comment line

    pheaderSize = len(pheader.split(',')) #how many elements are expected per line
    pdatalinesSize = [len(x.split(',')) for x in pdatalines] #count arraysize for every dataline

    if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
        plinesStarttime=plinesStarttime[np.array(pdatalinesSize)==pheaderSize]

    pdatalinesOK = pdatalines[np.array(pdatalinesSize)==pheaderSize]

    if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
        plinesStarttime = [plinesStarttime[i] for i,x in enumerate(pdatalinesOK) if x != pheader]

    pdatalinesOK = [x for x in pdatalinesOK if x != pheader] #exclude header from data lines

    
    pdatalinesFail = pdatalines[np.array(pdatalinesSize)!=pheaderSize]
    

    plinesSize = [len(x.split(',')) for x in plines] #count arraysize for every dataline
    plinesOK = np.array(plinesSize)==pheaderSize #mask for okay lines (valid for data lines)

    return plines,pheader,pcommentlinesMask,pdatalines,pdatalinesFail,pdatalinesOK,pheaderSize,plinesOK,plinesStarttime


inputFilenames=[x.name for x in args.input]

outputFilename=None
if args.output is not None:
    outputFilename=args.output.name


lines=[]
linesStarttime=[] #offset for every line with timestamp. will be combined to new column
header=""
for inputFilename in inputFilenames:
    print("Reading "+str(inputFilename))
    inputlines=[]
    with open(inputFilename, 'r') as reader:
        inputlines = reader.readlines()

    lines+=inputlines
    
    #Check Headers
    _lines,_header,_,_,_,_,_,_,_=filterLines(inputlines)
    
    if (header==""): #is first header
        header=_header

    assert header==_header, "Header is different!"

    _timestamp=getTimestamp(_lines)
    print("Timestamp="+str(_timestamp))
    _linesStarttime=[_timestamp for x in inputlines] #create as many entries with start timestamp as there are lines in the current file

    linesStarttime+=_linesStarttime
    
    print("Line in file="+str(len(inputlines)))

assert len(lines)==len(linesStarttime), "Length of lines and linesStarttime does not match"

linesStarttime=np.array(linesStarttime)
lines,header,commentlinesMask,datalines,datalinesFail,datalinesOK,headerSize,linesOK,linesStarttime=filterLines(lines,linesStarttime)

print("Found "+str(len(lines))+" lines")
print(str(np.sum(commentlinesMask))+" comments")
print(str(len(datalinesFail))+" Datalines Failed")
print(str(len(datalinesOK))+" Datalines OK")
print("Header Size is "+str(headerSize))

timestamp=getTimestamp(lines)
filetime = time.strftime('%Y%m%d_%H%M%S', time.localtime(timestamp))

if outputFilename is None:
    outputFilename = filetime+".csv"

#is_dst(datetime(2019, 4, 1), timezone="US/Pacific")
print("Timestamp:"+str(timestamp)+" -> "+str(filetime))
print("UTC: "+ datetime.utcfromtimestamp(timestamp).strftime('%A, %Y-%m-%d %H:%M:%S'))
print("Local Time:"+time.strftime('%A, %Y-%m-%d %H:%M:%S', time.localtime(timestamp)))


print("Writing to: "+str(outputFilename))

print("Size lines="+str(len(lines)))
print("Size commentlinesMask="+str(len(commentlinesMask)))
print("Size datalines="+str(len(datalines)))
print("Size linesOK="+str(len(linesOK)))

header="timestamp,"+header #add timestamp column

writelines = [str(linesStarttime[i]+float(x.split(',')[0]))+","+x for i,x in enumerate(datalinesOK)] #add file timestamp to line time and add column to data

linesWritten = 0
if ok:
    with open(outputFilename, 'w') as writer:
        writer.write(header+"\n") #write header
        for i,line in enumerate(writelines):
            writer.write(line+"\n")
            linesWritten+=1

    print(str(linesWritten)+" lines written to "+str(outputFilename))
else:
    print("Failed!")
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00			`import numpy as np`
			`from datetime import datetime`
			`import time`
add arguments for input and output file 2021-10-04 16:53:08 +00:00			`import argparse`

			`parser = argparse.ArgumentParser(description='Copys, renames and fixes logfiles written by bobbycar sd logger.')`
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`parser.add_argument('--input', type=argparse.FileType('r'), nargs='+')`
			`parser.add_argument('--output', nargs='?', type=argparse.FileType('w'))`
add arguments for input and output file 2021-10-04 16:53:08 +00:00			`args = parser.parse_args()`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
			`ok=True`

add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`def getTimestamp(plines):`
			`timestampline=-1`
			`timestampfound=False`
			`while not timestampfound:`
			`timestampline+=1`
			`timestampfound = (plines[timestampline].find('TIMESTAMP:')!=-1)`

			`timestamp=int(plines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created`

			`if (timestampline==-1):`
			`print("Error: Timestamp not found!")`
			`exit()`

			`return timestamp`


			`def filterLines(plines,plinesStarttime=None):`

			`plines = [x.rstrip("\n") for x in plines] #remove \n`
			`pcommentlinesMask = [True if x.startswith('#') else False for x in plines] #generate mask for lines with comments`




			`plines=np.array(plines)`
			`pcommentlinesMask=np.array(pcommentlinesMask)`

			`if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK`
			`plinesStarttime = plinesStarttime[pcommentlinesMask==False] #get lines with data`

			`pdatalines = plines[pcommentlinesMask==False] #get lines with data`



			`pheader = pdatalines[0] #header is the first non comment line`

			`pheaderSize = len(pheader.split(',')) #how many elements are expected per line`
			`pdatalinesSize = [len(x.split(',')) for x in pdatalines] #count arraysize for every dataline`

			`if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK`
			`plinesStarttime=plinesStarttime[np.array(pdatalinesSize)==pheaderSize]`

			`pdatalinesOK = pdatalines[np.array(pdatalinesSize)==pheaderSize]`

			`if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK`
			`plinesStarttime = [plinesStarttime[i] for i,x in enumerate(pdatalinesOK) if x != pheader]`

			`pdatalinesOK = [x for x in pdatalinesOK if x != pheader] #exclude header from data lines`




			`pdatalinesFail = pdatalines[np.array(pdatalinesSize)!=pheaderSize]`


			`plinesSize = [len(x.split(',')) for x in plines] #count arraysize for every dataline`
			`plinesOK = np.array(plinesSize)==pheaderSize #mask for okay lines (valid for data lines)`

			`return plines,pheader,pcommentlinesMask,pdatalines,pdatalinesFail,pdatalinesOK,pheaderSize,plinesOK,plinesStarttime`



			`inputFilenames=[x.name for x in args.input]`

add arguments for input and output file 2021-10-04 16:53:08 +00:00			`outputFilename=None`
			`if args.output is not None:`
			`outputFilename=args.output.name`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00



add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`lines=[]`
			`linesStarttime=[] #offset for every line with timestamp. will be combined to new column`
			`header=""`
			`for inputFilename in inputFilenames:`
			`print("Reading "+str(inputFilename))`
			`inputlines=[]`
			`with open(inputFilename, 'r') as reader:`
			`inputlines = reader.readlines()`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`lines+=inputlines`

			`#Check Headers`
			`_lines,_header,_,_,_,_,_,_,_=filterLines(inputlines)`

			`if (header==""): #is first header`
			`header=_header`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`assert header==_header, "Header is different!"`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`_timestamp=getTimestamp(_lines)`
			`print("Timestamp="+str(_timestamp))`
			`_linesStarttime=[_timestamp for x in inputlines] #create as many entries with start timestamp as there are lines in the current file`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`linesStarttime+=_linesStarttime`

			`print("Line in file="+str(len(inputlines)))`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`assert len(lines)==len(linesStarttime), "Length of lines and linesStarttime does not match"`

			`linesStarttime=np.array(linesStarttime)`
			`lines,header,commentlinesMask,datalines,datalinesFail,datalinesOK,headerSize,linesOK,linesStarttime=filterLines(lines,linesStarttime)`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
			`print("Found "+str(len(lines))+" lines")`
update logfix for new logformat and only output csv lines 2023-07-12 16:39:46 +00:00			`print(str(np.sum(commentlinesMask))+" comments")`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00			`print(str(len(datalinesFail))+" Datalines Failed")`
			`print(str(len(datalinesOK))+" Datalines OK")`
			`print("Header Size is "+str(headerSize))`

add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`timestamp=getTimestamp(lines)`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00			`filetime = time.strftime('%Y%m%d_%H%M%S', time.localtime(timestamp))`
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00
add arguments for input and output file 2021-10-04 16:53:08 +00:00			`if outputFilename is None:`
			`outputFilename = filetime+".csv"`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00
			`#is_dst(datetime(2019, 4, 1), timezone="US/Pacific")`
			`print("Timestamp:"+str(timestamp)+" -> "+str(filetime))`
			`print("UTC: "+ datetime.utcfromtimestamp(timestamp).strftime('%A, %Y-%m-%d %H:%M:%S'))`
			`print("Local Time:"+time.strftime('%A, %Y-%m-%d %H:%M:%S', time.localtime(timestamp)))`


			`print("Writing to: "+str(outputFilename))`

update logfix for new logformat and only output csv lines 2023-07-12 16:39:46 +00:00			`print("Size lines="+str(len(lines)))`
			`print("Size commentlinesMask="+str(len(commentlinesMask)))`
			`print("Size datalines="+str(len(datalines)))`
			`print("Size linesOK="+str(len(linesOK)))`

add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`header="timestamp,"+header #add timestamp column`

			`writelines = [str(linesStarttime[i]+float(x.split(',')[0]))+","+x for i,x in enumerate(datalinesOK)] #add file timestamp to line time and add column to data`
update logfix for new logformat and only output csv lines 2023-07-12 16:39:46 +00:00
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00			`linesWritten = 0`
add arguments for input and output file 2021-10-04 16:53:08 +00:00			`if ok:`
			`with open(outputFilename, 'w') as writer:`
add logfile concatenation and timestamp column 2023-07-12 18:17:27 +00:00			`writer.write(header+"\n") #write header`
			`for i,line in enumerate(writelines):`
update logfix for new logformat and only output csv lines 2023-07-12 16:39:46 +00:00			`writer.write(line+"\n")`
			`linesWritten+=1`
add arguments for input and output file 2021-10-04 16:53:08 +00:00
			`print(str(linesWritten)+" lines written to "+str(outputFilename))`
			`else:`
			`print("Failed!")`
add rtc to teensy and add python logfile renamer and fixer 2021-10-04 16:41:16 +00:00