Adding Class 11 examples.

2019-04-17 11:24:53 -04:00 · 2019-04-17 11:24:53 -04:00 · 3aadce4524
parent b2a6e5070a
commit 3aadce4524
7 changed files with 31768 additions and 413 deletions
--- a/Templates/urls_maps.txt
+++ b/Templates/urls_maps.txt
@ -1 +0,0 @@
-https://www.loc.gov/cds/downloads/MDSConnect/Maps.2014.part01.xml.gz
--- a/Class6/CleanMARC/.gitignore
+++ b/Class6/CleanMARC/.gitignore
@ -1,4 +0,0 @@
-node_modules/*
-data/*.gz
-data/*.xml
-out/*
--- a/Class6/CleanMARC/data/urls.txt
+++ b/Class6/CleanMARC/data/urls.txt
@ -1,41 +0,0 @@
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part01.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part02.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part03.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part04.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part05.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part06.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part07.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part08.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part09.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part10.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part11.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part12.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part13.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part14.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part15.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part16.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part17.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part18.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part19.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part20.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part21.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part22.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part23.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part24.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part25.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part26.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part27.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part28.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part29.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part30.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part31.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part32.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part33.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part34.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part35.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part36.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part37.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part38.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part39.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part40.xml.gz
-http://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2014.part41.xml.gz
--- a/Class6/CleanMARC/index.js
+++ b/Class6/CleanMARC/index.js
@ -1,197 +0,0 @@
-/*
-
-Clean Base MARC example
-Jer Thorp
-3/11/19
-
- npm install
- npm run download-data
- npm start
-
-NOTE: You will need to have the URLs you'd like the download script to get listed in data/urls.txt
-NOTE: If the download-data command doesn't work you probably need to install wget:
-	OSX: brew install wget
-	Windows: http://gnuwin32.sourceforge.net/packages/wget.htm
-
-This example runs through the Books -  (~25m records) - and finds titles with the word 'monkey'.
-It's a purposely simple example to build on. Following examples look at how to expand these concepts.
-
-*/
-
-//We're using the xml2object package, which takes XML loaded as text and parses it into a javascript object
-const xml2object = require('xml2object');
-//The filesystem package is used to load the .gz files from the local directory
-const fs = require('fs');  
-//The zlib package is used to unzip the .gz files
-const zlib = require('zlib');
-//I like to use this package which provides a clean way to reference to root directory of a node project
-const appRoot = require('app-root-path');
-
-//Where is the data?
-var dataPath = appRoot + "/data";
-
-//Which subset of the MARC files were we looking for?
-const filePrefix = "BooksAll";
-//How many of them are there?
-const fileMap = [];
-fileMap["BooksAll"] = 			41;
-fileMap["Computer.Files"] = 	1; 
-fileMap["Maps"] = 				1;
-fileMap["Music"] = 				1; 
-fileMap["Names"] = 				37; 
-fileMap["Serials"] = 			11;
-fileMap["Subjects"] = 			2;
-fileMap["Visual.Materials"] = 	1;
-//Total number of files to load
-const fileCount = fileMap[filePrefix];
-//Number of files we've already loaded
-//We start at 1 because the MARC files are 1-indexed
-var fileCounter = 1;
-
-//List to hold objects we want to write to file at the end
-var outList = [];
-
-//The xml2object package needs us to build a parser object - that will ingest the XML and then
-//trigger functions when the parse is complete. 
-const parser = new xml2object([ 'record' ]);
-
-
-
-
-//------------------XML PARSER ---------------------------------------------------------------------------!!
-//When we construct the parser with an array of which xml elements to look for. In our case, we're
-//interested in the record objects. We also can pass in a reference to the file name.
-function makeParser() {
-
-	//The parser's on method handles events. Here, we'll define what happens when it finishes parsing an object 
-	parser.on('object', function(name, obj) {
-
-	  	//Use the parseRecord method to get the Title of the object
-	  	var marcDict = {};
-	  	marcDict["245"] = {"*" :"Title"};
-
-	  	var record = parseRecord(obj, marcDict);
-
-	  	//****************************** HERE'S THE PIECE OF CODE THAT ACTUALLY DOES THE THING!!***********
-	  	if (record.Title) {
-		  	var fullTitle = record.Title.join(" ");
-		  	if (fullTitle.toLowerCase().indexOf('monkey') != -1) {
-		  		console.log("FOUND A MONKEY!");
-		  		console.log(record.Title);
-		  		outList.push({title:record.Title})
-		  	}
-	    }
-	});
-
-	//And what happens when it finishes parsing all of the records. 
-	parser.on('end', function() {
-	    onParseFinished();
-	}); 
-
-}
-
-
-
-
-//------------------MARC PARSE FUNCTION ---------------------------------------------------------------------------!!
-//This function expects an object from xml2obj, and a dictionary object which links
-//the mark tags and subfields to a property name.
-//
-//For example, you could do this:
-//  marcDict["260"] = {"c" :"Year"};
-//
-//Which asks the parser to link records with a tag of 260 and a subfield of c to the property Year.
-//
-//You can also use * to say you want ALL subfields of a tag to be stored in a property:
-//
-//	marcDict["245"] = {"*" :"Title"};
-
-function parseRecord(obj, marcDict) {
- 	record = {};
-
-	for (var i = 0; i < obj.datafield.length; i++) {
-		var df = obj.datafield[i];
-		//Get the numeric tag
-		var tag  = df.tag;
-
-		//If we have the tag in our dictionary, write to the JSON object
-		//Based on the code (doesn't work for all cases?)
-		if (marcDict[tag] && df.subfield) {
-			var isAll = marcDict[tag]['*'];
-
-			for (var j = 0; j < df.subfield.length; j++) {
-
-				var code = isAll ? "*":df.subfield[j].code;
-				var disp = df.subfield[j]['$t'];
-				
-				if (marcDict[tag][code] || isAll) {
-					if (!record[marcDict[tag][code]]) {
-						record[marcDict[tag][code]] = [];
-					}
-					record[marcDict[tag][code]].push(disp);
-				}
-			}
-		}
-	}
-	return(record);	
-}
-
-
-
-
-
-//------------------FILE LOADING CASCADE ---------------------------------------------------------------------------!!
-//These two functions sequence through the list of MARC records one by one and process them with our 
-//xml2object parser
-function loadNextFile() {
-	if (fileCounter <= fileCount) {
-		//Put a zero in file names under 10
-		var n = (fileCounter < 10 ? "0":"") + fileCounter;
-		//Construct the URL
-		var url = dataPath + "/" + filePrefix + ".2014.part" + n + ".xml.gz";
-		//Open up a read stream and unzip it
-		var rstream = fs.createReadStream(url);
-		var gunzip = zlib.createGunzip();
-		 	
-		rstream   // reads from the url we've constructed
-		  .pipe(gunzip)  // uncompresses
-		  .pipe(parser.saxStream); //Parses into record objects
-			
-		fileCounter ++;
-		console.log("LOADING FILE : " + url);
-	}
-}
-
-
-function onParseFinished() {
-	
-	//Write every time - useful in very long processes
-	writeFile(outList);
-	try {
-		loadNextFile();
-	} catch(err) {
-		console.log("ERROR LOADING NEXT FILE: " + fileCounter);
-	}
-}
-
-
-
-
-//------------------JSON WRITER ---------------------------------------------------------------------------!!
-//Writes any JSON object to a file
-function writeFile(json) {
-	var json = JSON.stringify(json, null, 2);
-	//Write
-	console.log("WRITING." + json.length);
-	//File prefix is defined on line 26
-	fs.writeFile(appRoot + "/out/output_" + filePrefix + ".json", json, 'utf8', function() {
-		console.log("Saved JSON.");
-	});
-}
-
-//PULL THE TRIGGER.
-makeParser();
-loadNextFile();
-
-
-
--- a/Class6/CleanMARC/package-lock.json
+++ b/Class6/CleanMARC/package-lock.json
@ -1,36 +0,0 @@
-{
-  "name": "cleanmarc_list",
-  "version": "1.0.0",
-  "lockfileVersion": 1,
-  "requires": true,
-  "dependencies": {
-    "app-root-path": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-2.1.0.tgz",
-      "integrity": "sha1-mL9lmTJ+zqGZMJhm6BQDaP0uZGo="
-    },
-    "fs": {
-      "version": "0.0.1-security",
-      "resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz",
-      "integrity": "sha1-invTcYa23d84E/I4WLV+yq9eQdQ="
-    },
-    "sax": {
-      "version": "1.2.4",
-      "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
-      "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw=="
-    },
-    "xml2object": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/xml2object/-/xml2object-0.1.2.tgz",
-      "integrity": "sha1-hylkKI6BgaUP3UT3iRCX/lyYK0U=",
-      "requires": {
-        "sax": ">=0.3.5"
-      }
-    },
-    "zlib": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/zlib/-/zlib-1.0.5.tgz",
-      "integrity": "sha1-bnyXL8NxxkWmr7A6sUdp3vEU/MA="
-    }
-  }
-}
--- a/Class6/CleanMARC/package.json
+++ b/Class6/CleanMARC/package.json
@ -1,19 +0,0 @@
-{
-  "name": "cleanmarc_list",
-  "version": "1.0.0",
-  "description": "ITP Artists in the Archive MARC Example",
-  "main": "index.js",
-  "scripts": {
-    "start": "node index.js",
-    "download-data": "wget --input-file data/urls.txt --directory-prefix data"
-  },
-  "dependencies": {
-    "app-root-path": "^2.1.0",
-    "fs": "^0.0.1-security",
-    "xml2object": "^0.1.2",
-    "zlib": "^1.0.5"
-  },
-  "devDependencies": {},
-  "author": "Jer Thorp",
-  "license": "ISC"
-}
--- a/Class6/MARC_Network/animals_network.json
+++ b/Class6/MARC_Network/animals_network.json
				`@ -1 +0,0 @@`
				`https://www.loc.gov/cds/downloads/MDSConnect/Maps.2014.part01.xml.gz`