[open-ils-commits] r15864 - branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/Application/Cat (miker)

svn at svn.open-ils.org svn at svn.open-ils.org
Tue Mar 16 11:00:22 EDT 2010


Author: miker
Date: 2010-03-16 11:00:20 -0400 (Tue, 16 Mar 2010)
New Revision: 15864

Modified:
   branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm
Log:
Improved patch from Galen Charlton: removes empty XML elements when ingesting a bib record

Modified: branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm
===================================================================
--- branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm	2010-03-16 15:00:06 UTC (rev 15863)
+++ branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm	2010-03-16 15:00:20 UTC (rev 15864)
@@ -123,15 +123,35 @@
 	my $marcxml = XML::LibXML->new->parse_string($xml);
 	$marcxml->documentElement->setNamespace($MARC_NAMESPACE, "marc", 1 );
 	$marcxml->documentElement->setNamespace($MARC_NAMESPACE);
-	# remove empty control fields - at least one source of records adds ersatz blank 008s
-	# that become empty controlfield elements
-	foreach my $controlfield ($marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'controlfield')) {
-		$controlfield->parentNode->removeChild($controlfield) unless $controlfield->hasChildNodes();
-	}
+	__remove_empty_marc_nodes($marcxml);
 	return $marcxml;
 }
 
+# remove empty control fields, subfields, and variable data fields, which
+# can creep in via less-than-correct imported MARC records or issues
+# with templates
+sub __remove_empty_marc_nodes {
+	my $marcxml = shift;
 
+	__remove_if_childless($_) foreach $marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'controlfield');
+	__remove_if_childless($_) foreach $marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'subfield');
+	__remove_if_childless($_) foreach $marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'datafield');
+}
+
+sub __remove_if_childless {
+	my $node = shift;
+	my @children = $node->childNodes();
+	my $has_nonblank_children = 0;
+	# can do this more concisely by requiring XML::LibXML >= 1.70 and using nonBlankChildNodes()
+	foreach my $node ($node->childNodes()) {
+		if ($node->nodeType != XML::LibXML::XML_TEXT_NODE || $node->nodeValue !~ /^\s*$/) {
+			$has_nonblank_children = 1;
+			last;
+		}
+	}
+	$node->parentNode->removeChild($node) unless $has_nonblank_children;
+}
+
 sub _find_tcn_info { 
 	my $editor		= shift;
 	my $xml			= shift;



More information about the open-ils-commits mailing list