aboutsummaryrefslogtreecommitdiff
path: root/json-format.pl
blob: feeb77fa09dc7331aec9451d3169b806dc9acfe3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env perl
# To speed up processing of large files, GNU parallel can be used:
#   $ parallel --pipe -k json-format.pl INPUT
use strict;
use warnings;
use Term::ANSIColor;
use Getopt::Long;

my $reset = color('reset');
my %format = (
  FIELD  => color('bold'),
  NULL   => color('cyan'),
  BOOL   => color('red'),
  NUMBER => color('magenta'),
  STRING => color('blue'),
  ERROR  => color('bold white on_red'),
);

my $color = 'auto';
my $help;
if (!GetOptions('color=s' => \$color, 'help' => \$help) || $help) {
  print STDERR
    "Usage: $0 [OPTION...] [FILE...]\n" .
	"Pretty-print and colorify JSON\n" .
    "\n" .
    "  --help         print this help\n" .
    "  --color=COLOR  'always', 'never' or 'auto' (the default)\n";
  exit 2;
}

%format = ()
  if $color eq 'never' || $color eq 'auto' && !-t STDOUT;

# Hash lookup is the fastest way to qualify tokens, however it cannot be used
# for everything and we need to fall back to regular expressions
my %lookup = (
  '[' => 'LBRACKET', '{' => 'LBRACE',
  ']' => 'RBRACKET', '}' => 'RBRACE',
  ':' => 'COLON', ',' => 'COMMA',
  'true' => 'BOOL', 'false' => 'BOOL', 'null' => 'NULL',
);
my @pats = (
  ['"(?:[^\\\\"]*|\\\\(?:u[\da-f]{4}|["\\\\/bfnrt]))*"' => 'STRING'],
  ['-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?' => 'NUMBER'],
  ['[ \t\r\n]+' => 'WS'],
);
my @tokens = map {[qr/^$_->[0]$/s, $_->[1]]} @pats;

# m//g is the fastest way to explode text into tokens in the first place
# and we need to construct an all-encompassing regular expression for it
my @all_pats = map {$_->[0]} @pats;
push @all_pats, quotemeta for keys %lookup;
my $any_token = qr/\G(${\join '|', @all_pats})/;

# FIXME: this probably shouldn't be a global variable
my $indent = 0;

sub nexttoken ($) {
  my $ref = shift;
  return unless @$ref;
  my $text = shift @$ref;
  if (my $s = $lookup{$text}) {
    return $s, $text;
  }
  for my $s (@tokens) {
    return $s->[1], $text if $text =~ $s->[0];
  }
  return 'ERROR', $text;
}

sub printindent () {
  print "\n";
  print '  ' x $indent;
}

sub do_value ($$$);
sub do_object ($) {
  my $json = shift;
  my $in_field_name = 1;
  while (my ($token, $text) = nexttoken $json) {
    if ($token eq 'COLON') {
      $in_field_name = 0;
    } elsif ($token eq 'COMMA') {
      $in_field_name = 1;
    } elsif ($token eq 'STRING') {
      $token = 'FIELD' if $in_field_name;
    }
    if ($token eq 'RBRACE') {
      $indent--;
      printindent;
    }
    do_value $token, $text, $json;
    return if $token eq 'RBRACE';
  }
}

sub do_array ($) {
  my $json = shift;
  while (my ($token, $text) = nexttoken $json) {
    if ($token eq 'RBRACKET') {
      $indent--;
      printindent;
    }
    do_value $token, $text, $json;
    return if $token eq 'RBRACKET';
  }
}

sub do_value ($$$) {
  my ($token, $text, $json) = @_;
  if (my $format = $format{$token}) {
    print $format, $text, $reset;
  } else {
    print $text;
  }
  if ($token eq 'LBRACE') {
    $indent++;
    printindent;
    do_object $json;
  } elsif ($token eq 'LBRACKET') {
    $indent++;
    printindent;
    do_array $json;
  } elsif ($token eq 'COMMA') {
    printindent;
  } elsif ($token eq 'COLON') {
    print ' ';
  }
}

while (<>) {
    my $json = $_;

    my @matches = $json =~ /$any_token/gsc;
    push @matches, substr $json, pos $json
      if pos $json != length $json;
    while (my ($token, $text) = nexttoken \@matches) {
      next if $token eq 'WS';
      do_value $token, $text, \@matches;
    }
    print "\n";
}