MarkLogic Splitting XML File in custom transformation module

Question

According to the documentation about Custom Transformation during mlcp ingestion, the function in module can generate zero, one, or many output documents. How could be splitted the following document by tag "person"? Would be also possible to obtain an attribute CREATE_DATE?(options for mlcp like -input_file_type "aggregates" and -aggregate_record_element person are not acceptable since attribute CREATE_DATE from tag "people" will not be available in content)

<people CREATE_DATE="07/01/2020">
  <person>
    <first>George</first>
    <last>Washington</last>
  </person>
  <person>
    <first>Betsy</first>
    <last>Ross</last>
  </person>
</people>

Answer 1

You could send the entire XML doc in and apply a custom transform that iterates over each of the person elements, creates a new person element with the /people/@CREATE_DATE and the person/node() children:

xquery version "1.0-ml";
module namespace example = "example";

declare function example:transform(
  $content as map:map,
  $context as map:map
) as map:map*
{
  let $doc := map:get($content, "value")
  let $create-date := $doc/people/@CREATE_DATE
  for $person in $doc/people/person
  return 
    map:entry("uri", "/people/person-"||xdmp:random()||".xml")
    => map:with("value", <person>{$create-date, $person/node()}</person>)
};

This may not be the most efficient, but was the shortest and easiest way that I could think to do this in a custom JavaScript transformation:

const mem = require('/MarkLogic/appservices/utils/in-mem-update.xqy');

function splitPeople(content) {
  const createDate = content.value.xpath('/people/@CREATE_DATE');
  return Sequence.from(content.value.xpath('/people/person'), (person) => {
    const clone = new NodeBuilder().addNode(person).toNode();
    return {
      "uri": '/people/person-' + xdmp.random() + '.xml',
      "value": mem.nodeInsertChild(clone, createDate)
    }
  })
}

exports.transform = splitPeople;

MarkLogic Splitting XML File in custom transformation module

Question

1 answers

solution1
0 2022-01-08 17:03:46

MarkLogic Splitting XML File in custom transformation module

Question

1 answers

solution1 0 2022-01-08 17:03:46

solution1
0 2022-01-08 17:03:46