scripts: Add data preparation (interpolation & sampling) script

author: Tobias Klauser <tobias.klauser@uzh.ch> 2013-02-12 10:38:14 +0100
committer: Tobias Klauser <tobias.klauser@uzh.ch> 2013-02-12 10:38:14 +0100
commit: 262b6d03536e3fa08a169be53e232dde3d1c517a (patch)
tree: f90790016f24b46e5a7109b286ce29205a99c7b3
parent: ae37406edd3da83d6817b8bef88cc43191875fb5 (diff)
1 files changed, 122 insertions, 0 deletions
diff --git a/scripts/prepare-data.py b/scripts/prepare-data.py
new file mode 100755
index 0000000..7250bde
--- /dev/null
+++ b/scripts/prepare-data.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+
+import csv
+import getopt
+import os, sys
+import scipy.interpolate
+import numpy as np
+import matplotlib.pyplot as plt
+
+INTERP_METHODS = [ 'cubic', 'linear' ]
+DEFAULT_N = 100
+
+def usage():
+    print("""usage: {} [OPTION...] CSV-FILE...
+
+    -d DIR     use DIR as output directory (default: same as input file)
+    -f         force overwrite of existing files
+    -i INTERP  use interpolation method INTERP. Must be one of: {} (default: {})
+    -N N       produce N data points in the output (default: {})
+    -p         plot result (default: off)
+    -h         show this help and exit""".format(os.path.basename(sys.argv[0]),
+        ','.join(INTERP_METHODS), INTERP_METHODS[0], DEFAULT_N))
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "d:fi:N:ph")
+    except getopt.GetoptError, err:
+        print(str(err))
+        usage()
+        sys.exit(-1)
+
+    if len(args) < 1:
+        usage()
+        sys.exit(-1)
+
+    output_dir = None
+    force = False
+    interp = INTERP_METHODS[0]
+    N = DEFAULT_N
+    do_plot = False
+    x_max = 100
+    y_max = 50
+    for o, a in opts:
+        if o == '-d':
+            if os.path.exists(a):
+                output_dir = a
+            else:
+                print("Error: output directory {} is not a valid path".format(a))
+                sys.exit(-1)
+        elif o == '-f':
+            force = True
+        elif o == '-i':
+            if a in INTERP_METHODS:
+                interp = a
+            else:
+                print("Error: invalid interpolation method: {}".format(a))
+                usage()
+                sys.exit(-1)
+        elif o == '-N':
+            try:
+                N = int(a)
+            except ValueError:
+                print("Error: invalid number of data points: {}".format(N))
+                usage()
+                sys.exit(-1)
+        elif o == '-p':
+            do_plot = True
+        elif o == '-h':
+            usage()
+            sys.exit(0)
+        else:
+            assert False, "unhandled option"
+
+    for csv_file in args:
+        if not os.path.exists(csv_file):
+            print("Error: File {} not found, skipping".format(csv_file))
+            continue
+
+        with open(csv_file, 'r') as cf:
+            dialect = csv.Sniffer().sniff(cf.read(1024))
+            cf.seek(0)
+            csv_reader = csv.reader(cf, dialect)
+            x_name, y_name = csv_reader.next()  # header line
+
+            X = np.array([[float(_x), float(_y)] for _x, _y in csv_reader ])
+            x = X[:,0]
+            y = X[:,1]
+
+            # TODO: maybe smoothen data first?
+
+            # interpolate data points
+            xnew = np.linspace(min(x), max(x), N)
+            f = scipy.interpolate.interp1d(x, y, kind=interp)
+            ynew = f(xnew)
+
+            if do_plot:
+                plt.plot(x, y, 'x')
+                plt.plot(xnew, ynew, '-')
+                plt.legend([ 'data', "{} interpolation".format(interp) ], loc='best')
+                plt.axis([ 0, x_max, 0, y_max ], 'equal')
+                plt.xlabel(x_name)
+                plt.ylabel(y_name)
+                plt.title(csv_file)
+                plt.grid(True)
+
+                plt.show()
+
+            Y = np.transpose(np.array([ xnew, ynew ]))
+
+            if output_dir:
+                od = output_dir
+            else:
+                od = os.path.dirname(csv_file)
+            out_file = os.path.join(od, os.path.basename(csv_file) + '.interp')
+            if os.path.exists(out_file) and not force:
+                print("Error: file {} already exists, not overwriting".format(out_file))
+            else:
+                csv_writer = csv.writer(open(out_file, 'w'), dialect)
+                csv_writer.writerow([ x_name, y_name ])
+                csv_writer.writerows(Y)
+if __name__ == '__main__':
+    main()
author	Tobias Klauser <tobias.klauser@uzh.ch>	2013-02-12 10:38:14 +0100
committer	Tobias Klauser <tobias.klauser@uzh.ch>	2013-02-12 10:38:14 +0100
commit	262b6d03536e3fa08a169be53e232dde3d1c517a (patch)
tree	f90790016f24b46e5a7109b286ce29205a99c7b3
parent	ae37406edd3da83d6817b8bef88cc43191875fb5 (diff)