Cool little python script for merging / swapping fields in a CSV file

  • Posted on December 18, 2010
  • Tagged python

I had tons of fun writing a little python script to extract fields from CSV files the other day. I’ve only used python once before during a CS olympiad, so feel free to critique!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import sys
import getopt
import re
import csv

def help_message():
    print '''awesome_csv.py -- 
Options: -h     -- displays this help message
         -i     -- input file
         -o     -- output file
         -f     -- fields (merge fields with: \%,1,2:\ 3,4,5 where "\%" represents a counter). 
         -s     -- number of lines to skip'''
    sys.exit(0)

skip_lines = 0

try:
    options, xarguments = getopt.getopt(sys.argv[1:], 'hi:o:f:s:')
except getopt.error:
    print 'Error: You tried to use an unknown option or the argument for an option that is required was missing. Try using -h'
    sys.exit(0)

for a in options[:]:
    if a[0] == '-h':
        help_message()

for a in options[:]:
    if a[0] == '-i' and a[1] != '':
        infile = a[1]
        break
    elif a[0] == '-i' and a[1] == '':
        print '-i expects an input file name as argument'
        sys.exit(0)

for a in options[:]:
    if a[0] == '-o' and a[1] != '':
        outfile = a[1]
        break
    elif a[0] == '-o' and a[1] == '':
        print '-o expects an output file name as argument'
        sys.exit(0)

for a in options[:]:
    if a[0] == '-f' and a[1] != '':
        fields = a[1]
        break
    elif a[0] == '-f' and a[1] == '':
        print '-f expects a list of output fields as argument'
        sys.exit(0)

for a in options[:]:
    if a[0] == '-s' and a[1] != '':
        skip_lines = int(a[1])
        break
    elif a[0] == '-s' and a[1] == '':
        print '-s expects the number of lines to skip as argument'
        sys.exit(0)



ifile  = open(infile, "rb")
reader = csv.reader(ifile)

ofile  = open(outfile, "wb")
writer = csv.writer(ofile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)

lines_skipped = 0
line_number   = 0

def repl(matchobj):
    return row[int(matchobj.group(0)) - 1]

for row in reader:
    if lines_skipped < skip_lines:
        lines_skipped += 1
    else:
        line_number += 1
        new_row = []

        for field in fields.split(","):
            new_field = re.sub(r'\d+', repl, field)
            new_field = re.sub(r'\%', str(line_number), new_field)

            new_row.append(new_field)

        writer.writerow(new_row)

ifile.close()
ofile.close()

print "Success! Python is awesome."

Used as:

1
2
3
4
5
6
7

echo "one,two,three" > test.csv

python parse.py -i test.csv -o result_with_merged_fields.csv -s 1 -f 1,2,3,2:\ 3,1:\ 2

cat result_with_merged_fields.csv
one,two,three,two: three,one: two

Happy!