If you're interested in functional programming, you might also want to checkout my second blog which i'm actively working on!!

Thursday, December 6, 2012

Merging DITA maps and topics

This week I did a major data conversion. For about 2k products we generated DITA maps (each pointing to 3 topics). But many products had the same data so the generated topics had the same <body> tag. So we decided to first merge all topics. This also meant we had to rewrite the topicrefs in the maps. And next we could also merge the maps themselves if they had the same topicrefs.

One important lesson learned.. I first used timestamps for the merged files. It seemed like Saxon was able to merge 4 use cases in 1 millisecond so they ended up overwriting each other. So I quickly had to look for another alternative and switched to using the hashcode of the grouping-keys.

Example map:
<?xml version="1.0" encoding="utf-8"?>
<value-proposition id="vp_BC51-10PA" rev="001.001" title="Value proposition" xml:lang="en-US">
  <topicmeta translate="no">
    <subtitle translate="yes">45 V, 1 A PNP medium power transistor</subtitle>
    <prodinfo><prodname>BC51-10PA</prodname></prodinfo>
  </topicmeta>
  <technical-summary-ref href="technical-summary/ts_BC51-10PA.dita"/>
  <features-benefits-ref href="features-benefits/fb_BC51-10PA.dita"/>
  <target-applications-ref href="target-applications/ta_BC51-10PA.dita"/>
</value-proposition>

Example topic
<?xml version="1.0" encoding="utf-8"?>
<p-topic id="fb_BC51-10PA" rev="001.001" xml:lang="en-US">
  <title translate="no">Features and benefits</title>
  <prolog translate="no">...</prolog>
  <body>
    <ul>
      <li><p>High current</p></li>
      <li><p>Three current gain selections</p></li>
      <li><p>High power dissipation capability</p></li>
      <li><p>Exposed heatsink for excellent thermal and electrical conductivity</p></li>
      <li><p>Leadless very small SMD plastic package with medium power capability</p></li>
      <li><p>AEC-Q101 qualified</p></li>
    </ul>
  </body>
</p-topic>

I just am going to share the XSLT's that did the hard work to merge the topics and maps. I'm sure I can reuse the same approach in the future.
topicmerge.xslt
<?xml version="1.0" encoding="UTF-8"?>
<!--
Author: Robby Pelssers
This stylesheet will merge topics if they have the same body tag
-->

<xsl:stylesheet version="2.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:xs="http://www.w3.org/2001/XMLSchema"
  xmlns:nxp="http://www.nxp.com">
  
  <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

  <xsl:param name="exportFolder"/>
  <xsl:param name="subFolder"/>
  <xsl:variable name="folderTemplate" select="concat('file:///', $exportFolder, $subFolder, '/topic-type/?select=*.dita')"/>

  <xsl:variable name="featuresandbenefits" select="collection(replace($folderTemplate, 'topic-type', 'features-benefits'))"/>
  <xsl:variable name="technicalsummaries" select="collection(replace($folderTemplate, 'topic-type', 'technical-summary'))"/>
  <xsl:variable name="targetapplications" select="collection(replace($folderTemplate, 'topic-type', 'target-applications'))"/>

  <xsl:variable name="date-format" select="'[Y0001]-[M01]-[D01]T[h01]:[m01]:[s01]'"/>

  <xsl:function name="nxp:getHashCode">
    <xsl:param name="stringvalue" as="xs:string"/>
    <xsl:value-of select="string:hashCode($stringvalue)" xmlns:string="java:java.lang.String"/>
  </xsl:function>

  <!-- handles a logical group of documents (featuresandbenefits | technicalsummaries | targetapplications) -->
  <xsl:template name="mergeDocumentGroup">
    <xsl:param name="documents"/>
    <xsl:for-each-group select="$documents" group-by="p-topic/body">
      <xsl:call-template name="p-topic">
        <xsl:with-param name="topics" select="current-group()/p-topic"/>
        <xsl:with-param name="grouping_key"  select="current-grouping-key()"/>
      </xsl:call-template>
    </xsl:for-each-group>
  </xsl:template>

  <xsl:template match="/">
    <result>
      <xsl:call-template name="mergeDocumentGroup">
        <xsl:with-param name="documents" select="$featuresandbenefits"/>
      </xsl:call-template>
      <xsl:call-template name="mergeDocumentGroup">
        <xsl:with-param name="documents" select="$technicalsummaries"/>
      </xsl:call-template>
      <xsl:call-template name="mergeDocumentGroup">
        <xsl:with-param name="documents" select="$targetapplications"/>
      </xsl:call-template>
    </result>
  </xsl:template>


  <xsl:template name="p-topic">
    <xsl:param name="topics"/>
    <xsl:param name="grouping_key"/>
    <xsl:variable name="topic" select="$topics[1]"/>
    <p-topic>
      <xsl:choose>
        <xsl:when test="count($topics) > 1">
          <xsl:apply-templates select="$topic/@* | $topic/node()" mode="merge">
            <xsl:with-param name="grouping_key" select="$grouping_key" tunnel="yes"/>
          </xsl:apply-templates>
        </xsl:when>
        <xsl:otherwise>
          <xsl:apply-templates select="$topic/@* | $topic/node()"/>
        </xsl:otherwise>
      </xsl:choose>
      <!-- we temporarily add the original topic id's so we can easily alter the topicrefs in a subsequent transform -->
      <topics>
        <xsl:for-each select="$topics">
          <id><xsl:value-of select="./@id"/></id>
        </xsl:for-each>
      </topics>
    </p-topic>
  </xsl:template>

  <xsl:template match="p-topic/@id" mode="merge">
    <xsl:param name="grouping_key" tunnel="yes"/>
    <xsl:attribute name="id"
        select="concat(substring-before(., '_'), '_', translate(nxp:getHashCode($grouping_key), '-', ''))"/>
  </xsl:template>

    <!-- copy all nodes and attributes which are not processed by one of available templates -->
  <xsl:template match="@* | node()">
    <xsl:copy copy-namespaces="no">
      <xsl:apply-templates select="@*"/>
      <xsl:apply-templates/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="@* | node()" mode="merge">
    <xsl:copy copy-namespaces="no">
      <xsl:apply-templates select="@*" mode="merge"/>
      <xsl:apply-templates mode="merge"/>
    </xsl:copy>
  </xsl:template>


</xsl:stylesheet>

mapmerge.xslt
<?xml version="1.0" encoding="UTF-8"?>
<!--
Author: Robby Pelssers
This stylesheet will merge maps which have same topic refs and same title.
-->
<xsl:stylesheet version="2.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:xs="http://www.w3.org/2001/XMLSchema"
                xmlns:nxp="http://www.nxp.com">
  
  <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

  <xsl:variable name="date-format" select="'[Y0001]-[M01]-[D01]T[h01]:[m01]:[s01]'"/>

  <xsl:function name="nxp:getHashCode">
    <xsl:param name="stringvalue" as="xs:string"/>
    <xsl:value-of select="string:hashCode($stringvalue)" xmlns:string="java:java.lang.String"/>
  </xsl:function>

  <xsl:function name="nxp:getMapGroupingKey" as="xs:string">
    <xsl:param name="vp" as="element(value-proposition)"/>
    <xsl:sequence select="concat($vp/topicmeta/subtitle, $vp/technical-summary-ref/@href,
      $vp/features-benefits-ref/@href, $vp/target-applications-ref/@href)"/>
  </xsl:function>

  <xsl:template match="/">
    <xsl:apply-templates/>
  </xsl:template>

  <xsl:template match="result">
    <result>
      <xsl:apply-templates select="p-topic"/>
      <xsl:for-each-group select="value-proposition" group-by="nxp:getMapGroupingKey(.)">
        <xsl:call-template name="value-proposition">
          <xsl:with-param name="valuepropositions" select="current-group()"/>
          <xsl:with-param name="grouping_key"  select="current-grouping-key()"/>
        </xsl:call-template>
      </xsl:for-each-group>
    </result>
  </xsl:template>

  <xsl:template name="value-proposition">
    <xsl:param name="valuepropositions"/>
    <xsl:param name="grouping_key"/>
    <xsl:variable name="vp" select="$valuepropositions[1]"/>
    <value-proposition>
      <xsl:choose>
        <xsl:when test="count($valuepropositions) > 1">
          <xsl:apply-templates select="$vp/@* | $vp/node()" mode="merge">
            <xsl:with-param name="valuepropositions" select="$valuepropositions" tunnel="yes"/>
            <xsl:with-param name="grouping_key" select="$grouping_key" tunnel="yes"/>
          </xsl:apply-templates>
        </xsl:when>
        <xsl:otherwise>
          <xsl:apply-templates select="$vp/@* | $vp/node()"/>
        </xsl:otherwise>
      </xsl:choose>
    </value-proposition>
  </xsl:template>


  <xsl:template match="value-proposition/@id" mode="merge">
    <xsl:param name="grouping_key" tunnel="yes"/>
    <xsl:attribute name="id"
         select="concat(substring-before(., '_'), '_', translate(nxp:getHashCode($grouping_key), '-', ''))"/>
  </xsl:template>

  <!-- copy all nodes and attributes which are not processed by one of available templates -->
  <xsl:template match="@* | node()">
    <xsl:copy copy-namespaces="no">
      <xsl:apply-templates select="@*"/>
      <xsl:apply-templates/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="@* | node()" mode="merge">
    <xsl:copy copy-namespaces="no">
      <xsl:apply-templates select="@*" mode="merge"/>
      <xsl:apply-templates mode="merge"/>
    </xsl:copy>
  </xsl:template>


</xsl:stylesheet>

No comments:

Post a Comment