summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Klauser <tobias.klauser@uzh.ch>2013-02-12 10:38:14 +0100
committerTobias Klauser <tobias.klauser@uzh.ch>2013-02-12 10:38:14 +0100
commit262b6d03536e3fa08a169be53e232dde3d1c517a (patch)
treef90790016f24b46e5a7109b286ce29205a99c7b3
parentae37406edd3da83d6817b8bef88cc43191875fb5 (diff)
scripts: Add data preparation (interpolation & sampling) script
-rwxr-xr-xscripts/prepare-data.py122
1 files changed, 122 insertions, 0 deletions
diff --git a/scripts/prepare-data.py b/scripts/prepare-data.py
new file mode 100755
index 0000000..7250bde
--- /dev/null
+++ b/scripts/prepare-data.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+
+import csv
+import getopt
+import os, sys
+import scipy.interpolate
+import numpy as np
+import matplotlib.pyplot as plt
+
+INTERP_METHODS = [ 'cubic', 'linear' ]
+DEFAULT_N = 100
+
+def usage():
+ print("""usage: {} [OPTION...] CSV-FILE...
+
+ -d DIR use DIR as output directory (default: same as input file)
+ -f force overwrite of existing files
+ -i INTERP use interpolation method INTERP. Must be one of: {} (default: {})
+ -N N produce N data points in the output (default: {})
+ -p plot result (default: off)
+ -h show this help and exit""".format(os.path.basename(sys.argv[0]),
+ ','.join(INTERP_METHODS), INTERP_METHODS[0], DEFAULT_N))
+
+def main():
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "d:fi:N:ph")
+ except getopt.GetoptError, err:
+ print(str(err))
+ usage()
+ sys.exit(-1)
+
+ if len(args) < 1:
+ usage()
+ sys.exit(-1)
+
+ output_dir = None
+ force = False
+ interp = INTERP_METHODS[0]
+ N = DEFAULT_N
+ do_plot = False
+ x_max = 100
+ y_max = 50
+ for o, a in opts:
+ if o == '-d':
+ if os.path.exists(a):
+ output_dir = a
+ else:
+ print("Error: output directory {} is not a valid path".format(a))
+ sys.exit(-1)
+ elif o == '-f':
+ force = True
+ elif o == '-i':
+ if a in INTERP_METHODS:
+ interp = a
+ else:
+ print("Error: invalid interpolation method: {}".format(a))
+ usage()
+ sys.exit(-1)
+ elif o == '-N':
+ try:
+ N = int(a)
+ except ValueError:
+ print("Error: invalid number of data points: {}".format(N))
+ usage()
+ sys.exit(-1)
+ elif o == '-p':
+ do_plot = True
+ elif o == '-h':
+ usage()
+ sys.exit(0)
+ else:
+ assert False, "unhandled option"
+
+ for csv_file in args:
+ if not os.path.exists(csv_file):
+ print("Error: File {} not found, skipping".format(csv_file))
+ continue
+
+ with open(csv_file, 'r') as cf:
+ dialect = csv.Sniffer().sniff(cf.read(1024))
+ cf.seek(0)
+ csv_reader = csv.reader(cf, dialect)
+ x_name, y_name = csv_reader.next() # header line
+
+ X = np.array([[float(_x), float(_y)] for _x, _y in csv_reader ])
+ x = X[:,0]
+ y = X[:,1]
+
+ # TODO: maybe smoothen data first?
+
+ # interpolate data points
+ xnew = np.linspace(min(x), max(x), N)
+ f = scipy.interpolate.interp1d(x, y, kind=interp)
+ ynew = f(xnew)
+
+ if do_plot:
+ plt.plot(x, y, 'x')
+ plt.plot(xnew, ynew, '-')
+ plt.legend([ 'data', "{} interpolation".format(interp) ], loc='best')
+ plt.axis([ 0, x_max, 0, y_max ], 'equal')
+ plt.xlabel(x_name)
+ plt.ylabel(y_name)
+ plt.title(csv_file)
+ plt.grid(True)
+
+ plt.show()
+
+ Y = np.transpose(np.array([ xnew, ynew ]))
+
+ if output_dir:
+ od = output_dir
+ else:
+ od = os.path.dirname(csv_file)
+ out_file = os.path.join(od, os.path.basename(csv_file) + '.interp')
+ if os.path.exists(out_file) and not force:
+ print("Error: file {} already exists, not overwriting".format(out_file))
+ else:
+ csv_writer = csv.writer(open(out_file, 'w'), dialect)
+ csv_writer.writerow([ x_name, y_name ])
+ csv_writer.writerows(Y)
+if __name__ == '__main__':
+ main()