#!/usr/bin/perl

# json2catalog.pl - create a "catalog" from a set of HathiTrust json files

# Eric Lease Morgan <emorgan@nd.edu>
# May 15, 2015 - first cut; see https://sharc.hathitrust.org/features


# configure
use constant DEBUG    => 0;
use constant WORLDCAT => 'http://worldcat.org/oclc/';
use constant HEADER   => "id\ttitle\tpublication date\tpage count\tHathiTrust URL\tlanguage\tMARC (JSON) URL\tWorldCat URL\n";

# require
use Data::Dumper;
use JSON;
use strict;

# get input; sanity check
my $directory = $ARGV[ 0 ];
if ( ! $directory ) {

	print "Usage: $0 <directory>\n";
	exit;
	
}

# initialize
$| = 1;
binmode( STDOUT, ':utf8' );
print HEADER;

# process each file in the given directory
opendir DIRECTORY, $directory or die "Error in opening $directory: $!\n";
while ( my $filename = readdir( DIRECTORY ) ) {

	# only .json files
	next if ( $filename !~ /json$/ );

	# convert the json file to a hash
	my $json = decode_json &slurp( "$directory$filename" );
	if ( DEBUG ) { print Dumper( $json ) }

	# parse
	my $id        = $$json{ 'id' };
	my $title     = $$json{ 'metadata' }{ 'title' };
	my $date      = $$json{ 'metadata' }{ 'pubDate' };
	my $pagecount = $$json{ 'features' }{ 'pageCount' };
	my $handle    = $$json{ 'metadata' }{ 'handleUrl' };
	my $language  = $$json{ 'metadata' }{ 'language' };
	my $marc      = $$json{ 'metadata' }{ 'htBibUrl' };
	my $worldcat  = WORLDCAT . $$json{ 'metadata' }{ 'oclc' };

	# dump
	print "$id\t$title\t$date\t$pagecount\t$handle\t$language\t$marc\t$worldcat\n";
	     
}

# clean up and done
closedir(DIRECTORY);    
exit;


# read and return the contents of a file
sub slurp {
 
	my $f = shift;
	open ( F, $f ) or die "Can't open $f: $!\n";
	my $r = do { local $/; <F> };
	close F;
	return $r;
 
}