bobbycar/logdata/logfix.py
2025-07-17 17:35:02 +02:00

237 lines
7.8 KiB
Python

import numpy as np
from datetime import datetime, timezone
import pytz
import time
import argparse
import os.path
parser = argparse.ArgumentParser(description='Copys, renames and fixes logfiles written by bobbycar sd logger.')
parser.add_argument('-i', '--input', type=argparse.FileType('r'), nargs='+', required=True, help="list of input log files")
parser.add_argument('-o', '--output', nargs='?', type=str, help="output filename")
parser.add_argument('-c','--consecutive', action="store_true", help="add consecutive files to input. If the input file ends with a number the following logfiles will be added.")
parser.add_argument("-t", "--time", nargs='?', type=int, help="Create new csv file after time without data. In Minutes.")
args = parser.parse_args()
ok=True
def getTimestamp(plines):
timestampline=-1
timestampfound=False
while not timestampfound:
timestampline+=1
timestampfound = (plines[timestampline].find('TIMESTAMP:')!=-1)
timestamp=int(plines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created
if (timestampline==-1):
print("Error: Timestamp not found!")
exit()
return timestamp
def filterLines(plines,plinesStarttime=None):
plines = [x.rstrip("\n") for x in plines] #remove \n
pcommentlinesMask = [True if x.startswith('#') else False for x in plines] #generate mask for lines with comments
plines=np.array(plines)
pcommentlinesMask=np.array(pcommentlinesMask)
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime = plinesStarttime[pcommentlinesMask==False] #get lines with data
pdatalines = plines[pcommentlinesMask==False] #get lines with data
pheader = pdatalines[0] #header is the first non comment line
pheaderSize = len(pheader.split(',')) #how many elements are expected per line
pdatalinesSize = [len(x.split(',')) for x in pdatalines] #count arraysize for every dataline
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime=plinesStarttime[np.array(pdatalinesSize)==pheaderSize]
pdatalinesOK = pdatalines[np.array(pdatalinesSize)==pheaderSize]
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime = [plinesStarttime[i] for i,x in enumerate(pdatalinesOK) if x != pheader]
pdatalinesOK = [x for x in pdatalinesOK if x != pheader] #exclude header from data lines
pdatalinesFail = pdatalines[np.array(pdatalinesSize)!=pheaderSize]
plinesSize = [len(x.split(',')) for x in plines] #count arraysize for every dataline
plinesOK = np.array(plinesSize)==pheaderSize #mask for okay lines (valid for data lines)
return plines,pheader,pcommentlinesMask,pdatalines,pdatalinesFail,pdatalinesOK,pheaderSize,plinesOK,plinesStarttime
inputFilenames=[]
if (args.consecutive):
if(len(args.input)!=1):
parser.error("in consequtive mode exactly one input file is required")
exit()
nextFilename=args.input[0].name
while os.path.isfile(nextFilename):
print(nextFilename+" exists")
inputFilenames.append(nextFilename)
digitStartpos=len(nextFilename)-1
digitEndpos=len(nextFilename)
while (not nextFilename[digitStartpos:digitEndpos].isdigit() and digitStartpos>0 and digitEndpos>0):
digitStartpos-=1
digitEndpos-=1
while (nextFilename[digitStartpos:digitEndpos].isdigit() and digitStartpos>0 and digitEndpos>0):
digitStartpos-=1
digitStartpos+=1
number=int(nextFilename[digitStartpos:digitEndpos])+1
nextFilename=nextFilename[0:digitStartpos]+str(number).zfill(digitEndpos-digitStartpos)+nextFilename[digitEndpos:]
else:
inputFilenames=[x.name for x in args.input]
lines=[]
linesStarttime=[] #offset for every line with timestamp. will be combined to new column
header=""
for inputFilename in inputFilenames:
print("Reading "+str(inputFilename))
inputlines=[]
with open(inputFilename, 'r') as reader:
inputlines = reader.readlines()
lines+=inputlines
#Check Headers
_lines,_header,_,_,_,_,_,_,_=filterLines(inputlines)
if (header==""): #is first header
header=_header
assert header==_header, "Header is different!"
_timestamp=getTimestamp(_lines)
print("Timestamp="+str(_timestamp))
_linesStarttime=[_timestamp for x in inputlines] #create as many entries with start timestamp as there are lines in the current file
linesStarttime+=_linesStarttime
print("Lines in file="+str(len(inputlines)))
assert len(lines)==len(linesStarttime), "Length of lines and linesStarttime does not match"
linesStarttime=np.array(linesStarttime)
lines,header,commentlinesMask,datalines,datalinesFail,datalinesOK,headerSize,linesOK,linesStarttime=filterLines(lines,linesStarttime)
print("Found "+str(len(lines))+" lines")
print(str(np.sum(commentlinesMask))+" comments")
print(str(len(datalinesFail))+" Datalines Failed")
print(str(len(datalinesOK))+" Datalines OK")
print("Header Size is "+str(headerSize))
timestamp=getTimestamp(lines) #get first timestamp
# Divide in sets for separate files based on off time between logs
last_timestamp=timestamp
next_timestamp=timestamp
set_datalinesOK=[]
set_linesStarttime=[]
last_idata=0
if args.time is not None:
newfile_timestamp_difference=args.time
for idata,data in enumerate(datalinesOK):
last_timestamp=next_timestamp
next_timestamp=linesStarttime[idata]+float(datalinesOK[idata].split(',')[0])
if next_timestamp-last_timestamp>newfile_timestamp_difference: #diff too high, create new file
set_datalinesOK.append(datalinesOK[last_idata:idata])
set_linesStarttime.append(linesStarttime[last_idata:idata])
last_idata=idata
set_datalinesOK.append(datalinesOK[last_idata:]) #append last set
set_linesStarttime.append(linesStarttime[last_idata:]) #append last set
for iset,current_datalinesOK in enumerate(set_datalinesOK):
print("Creating Output for set "+str(iset))
current_linesStarttime=set_linesStarttime[iset]
timestamp=current_linesStarttime[0] #first line is start timestamp for this file
#filetime = datetime.fromtimestamp(timestamp).astimezone(pytz.timezone('Europe/Berlin')).strftime('%Y%m%d_%H%M%S')
filetime = datetime.utcfromtimestamp(timestamp).astimezone(pytz.timezone('Europe/Berlin')).strftime('%Y%m%d_%H%M%S') #Check if this still works in dst
#filetime = datetime.fromtimestamp(timestamp, tz=timezone.utc).astimezone(pytz.timezone('Europe/Berlin')).strftime('%Y%m%d_%H%M%S') #is +2h at least in summer
outputFilename=""
if args.output is not None:
outputFilename="_"+args.output
outputFilename = filetime+outputFilename+".csv"
#is_dst(datetime(2019, 4, 1), timezone="US/Pacific")
print("Timestamp:"+str(timestamp)+" -> "+str(filetime))
print("UTC: "+ datetime.utcfromtimestamp(timestamp).strftime('%A, %Y-%m-%d %H:%M:%S'))
#print("Local Time:"+time.strftime('%A, %Y-%m-%d %H:%M:%S', time.localtime(timestamp)))
print("Writing to: "+str(outputFilename))
header="timestamp,"+header #add timestamp column
writelines = [str(current_linesStarttime[i]+float(x.split(',')[0]))+","+x for i,x in enumerate(current_datalinesOK)] #add file timestamp to line time and add column to data
linesWritten = 0
if ok:
with open(outputFilename, 'w') as writer:
writer.write(header+"\n") #write header
for i,line in enumerate(writelines):
writer.write(line+"\n")
linesWritten+=1
print(str(linesWritten)+" lines written to "+str(outputFilename))
else:
print("Failed!")