aboutsummaryrefslogtreecommitdiff
path: root/json-format.pl
blob: 571e89e3fb725ab83f56697d7e1d768eb9e35f78 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env perl
use strict;
use warnings;
use Term::ANSIColor;
use Getopt::Long;

my $reset = color('reset');
my %format = (
  FIELD  => color('bold'),
  NULL   => color('cyan'),
  BOOL   => color('red'),
  NUMBER => color('magenta'),
  STRING => color('blue'),
  ERROR  => color('bold white on_red'),
);

my ($color, $keep_ws, $help) = 'auto';
if (!GetOptions('color=s' => \$color, 'keep-ws' => \$keep_ws, 'help' => \$help)
  || $help) {
  print STDERR
    "Usage: $0 [OPTION...] [FILE...]\n" .
    "Pretty-print and colorify JSON\n" .
    "\n" .
    "  --help         print this help\n" .
    "  --keep-ws      retain all original whitespace\n" .
    "  --color=COLOR  'always', 'never' or 'auto' (the default)\n";
  exit 2;
}

%format = ()
  if $color eq 'never' || $color eq 'auto' && !-t STDOUT;

# Hash lookup is the fastest way to qualify tokens, however it cannot be used
# for everything and we need to fall back to regular expressions
my %lookup = (
  '[' => 'LBRACKET', '{' => 'LBRACE',
  ']' => 'RBRACKET', '}' => 'RBRACE',
  ':' => 'COLON', ',' => 'COMMA',
  'true' => 'BOOL', 'false' => 'BOOL', 'null' => 'NULL',
);
my @pats = (
  ['"(?:[^\\\\"]*|\\\\(?:u[\da-f]{4}|["\\\\/bfnrt]))*"' => 'STRING'],
  ['-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?' => 'NUMBER'],
  ['[ \t\r\n]+' => 'WS'],
);
my @tokens = map {[qr/^$_->[0]$/s, $_->[1]]} @pats;

# m//g is the fastest way to explode text into tokens in the first place
# and we need to construct an all-encompassing regular expression for it
my @all_pats = map {$_->[0]} @pats;
push @all_pats, quotemeta for keys %lookup;
my $any_token = qr/\G(${\join '|', @all_pats})/;

# FIXME: this probably shouldn't be a global variable
my $indent = 0;

sub nexttoken ($) {
  my $json = shift;
  if (!@$json) {
    return unless defined (my $line = <>);
    push @$json, $line =~ /$any_token/gsc;
    push @$json, substr $line, pos $line
      if pos $line != length $line;
  }

  my $text = shift @$json;
  if (my $s = $lookup{$text}) {
    return $s, $text;
  }
  for my $s (@tokens) {
    return $s->[1], $text if $text =~ $s->[0];
  }
  return 'ERROR', $text;
}

sub gettoken ($) {
  my $json = shift;
  while (my ($token, $text) = nexttoken $json) {
    next if !$keep_ws && $token eq 'WS';
    return $token, $text;
  }
  return;
}

sub printindent () {
  print "\n", '  ' x $indent;
}

sub do_value ($$$);
sub do_object ($) {
  my $json = shift;
  my $in_field_name = 1;
  my $first = 1;
  while (my ($token, $text) = gettoken $json) {
    if ($token eq 'COLON') {
      $in_field_name = 0;
    } elsif ($token eq 'COMMA') {
      $in_field_name = 1;
    } elsif ($token eq 'STRING') {
      $token = 'FIELD' if $in_field_name;
    }
    if ($token eq 'RBRACE') {
      $indent--;
      printindent unless $keep_ws;
    } elsif ($first) {
      printindent unless $keep_ws;
      $first = 0;
    }
    do_value $token, $text, $json;
    return if $token eq 'RBRACE';
  }
}

sub do_array ($) {
  my $json = shift;
  my $first = 1;
  while (my ($token, $text) = gettoken $json) {
    if ($token eq 'RBRACKET') {
      $indent--;
      printindent unless $keep_ws;
    } elsif ($first) {
      printindent unless $keep_ws;
      $first = 0;
    }
    do_value $token, $text, $json;
    return if $token eq 'RBRACKET';
  }
}

sub do_value ($$$) {
  my ($token, $text, $json) = @_;
  if (my $format = $format{$token}) {
    print $format, $text, $reset;
  } else {
    print $text;
  }
  if ($token eq 'LBRACE') {
    $indent++;
    do_object $json;
  } elsif ($token eq 'LBRACKET') {
    $indent++;
    do_array $json;
  } elsif ($token eq 'COMMA') {
    printindent unless $keep_ws;
  } elsif ($token eq 'COLON') {
    print ' ' unless $keep_ws;
  }
}

my @buffer;
while (my ($token, $text) = gettoken \@buffer) {
  do_value $token, $text, \@buffer;
  print "\n" unless $keep_ws;
}