#!/usr/bin/perl # json2catalog.pl - create a "catalog" from a set of HathiTrust json files # Eric Lease Morgan # May 15, 2015 - first cut; see https://sharc.hathitrust.org/features # configure use constant DEBUG => 0; use constant WORLDCAT => 'http://worldcat.org/oclc/'; use constant HEADER => "id\ttitle\tpublication date\tpage count\tHathiTrust URL\tlanguage\tMARC (JSON) URL\tWorldCat URL\n"; # require use Data::Dumper; use JSON; use strict; # get input; sanity check my $directory = $ARGV[ 0 ]; if ( ! $directory ) { print "Usage: $0 \n"; exit; } # initialize $| = 1; binmode( STDOUT, ':utf8' ); print HEADER; # process each file in the given directory opendir DIRECTORY, $directory or die "Error in opening $directory: $!\n"; while ( my $filename = readdir( DIRECTORY ) ) { # only .json files next if ( $filename !~ /json$/ ); # convert the json file to a hash my $json = decode_json &slurp( "$directory$filename" ); if ( DEBUG ) { print Dumper( $json ) } # parse my $id = $$json{ 'id' }; my $title = $$json{ 'metadata' }{ 'title' }; my $date = $$json{ 'metadata' }{ 'pubDate' }; my $pagecount = $$json{ 'features' }{ 'pageCount' }; my $handle = $$json{ 'metadata' }{ 'handleUrl' }; my $language = $$json{ 'metadata' }{ 'language' }; my $marc = $$json{ 'metadata' }{ 'htBibUrl' }; my $worldcat = WORLDCAT . $$json{ 'metadata' }{ 'oclc' }; # dump print "$id\t$title\t$date\t$pagecount\t$handle\t$language\t$marc\t$worldcat\n"; } # clean up and done closedir(DIRECTORY); exit; # read and return the contents of a file sub slurp { my $f = shift; open ( F, $f ) or die "Can't open $f: $!\n"; my $r = do { local $/; }; close F; return $r; }