If you're interested in functional programming, you might also want to checkout my second blog which i'm actively working on!!

Monday, August 27, 2012

Still using XSLT1.0? Time to start using Saxon.


Folder structure:
   - input
        - jsonxml-1.xml
        - jsonxml-2.xml
        - jsonxml-3.xml
   - xslt
        - jsonxmltransformer.xslt
   - output (empty)

Below some basic usage instructions. For more details checkout the official documentation. You can download the saxon.jar from the official saxon home page or from this maven repository
java -jar Saxon-HE-9.4.jar [options] [params]

-s:filename    -- Identifies the source file or directory
-o:filename    -- Send output to named file. In the absence of this option, the results go to standard output.
                  If the source argument identifies a directory, this option is mandatory and must also identify a directory; 
                  on completion it will contain one output file for each file in the source directory
-threads:N     -- Used only when the -s option specifies a directory. Controls the number of threads used to process the files in the directory
-xsl:filename  -- Specifies the file containing the principal stylesheet module

Now let's see how easy it is to transform a single file jsonxml-1.xml and save the result to transformed-result1.xml
java -jar Saxon-HE-9.4.jar -s:C:/tmp/easytransform/input/jsonxml-1.xml -o:C:/tmp/easytransform/output/transformed-result1.xml -xsl:C:/tmp/easytransform/xslt/jsonxmltransformer.xslt
That was easy enough. But suppose we want to transform a complete directory of source files?
java -jar Saxon-HE-9.4.jar -s:C:/tmp/easytransform/input -o:C:/tmp/easytransform/output -xsl:C:/tmp/easytransform/xslt/jsonxmltransformer.xslt
This will by convention save the transformed results using the same filenames as the input files to the specified output directory.

Thursday, August 23, 2012

XML Database as source for RDF Database

We've come a long way reading and transforming XML resources from a plain filesystem to setting up an XML database and executing sophisticated cross collection xqueries. As we aim to always improve our role as information providers we are on the verge of switching to one-stop-shopping. Currently we only have a part of the masterdata stored in the XMLDB. And we already are able to
  • generate DITA maps / topics  (PDF creation,  automated translations, ...)
  • generate publications (xhtml)
  • answer data related question in real time
The main components in this architectural picture are
  • Websphere product centre  (exports productinformation as XML) (soon to change)
  • Apache Cocoon (main framework that does all of the above)
  • Sedna (XMLDB)

But to get one-stop-shopping we need a more flexible way to link data from different sources (RDBMS, XMLDB, CSV, ...)

We will automate data extraction for all information resources and transform that data into RDF so it becomes easy to link the data and offer a consistent way of quering the data (SPARQL endpoint)

Below an example of an XQuery library from which we can generate RDF from the XMLDB.
module namespace basictypes2rdf = "http://www.nxp.com/basictypes2rdf";

declare copy-namespaces preserve, inherit;

declare namespace rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare namespace skos="http://www.w3.org/2004/02/skos/core#";
declare namespace foaf="http://xmlns.com/foaf/0.1/";
declare namespace nxp="http://purl.org/nxp/schema/v1/";

import module namespace basictypes = "http://www.nxp.com/basictypes";
import module namespace string = "http://www.nxp.com/string";
import module namespace rdfutil = "http://www.nxp.com/rdfutil";
import module namespace packages2rdf = "http://www.nxp.com/packages2rdf";


declare function basictypes2rdf:fromBasicTypesRaw($products as element(Product)*) as element(rdf:Description)* {
    for $product in $products
    let $btn := basictypes:getName($product)
    return
    <rdf:Description rdf:about="{basictypes2rdf:getURI($product)}">
      <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
      <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{data($product/ProductInformation/ProductStatusDate)}</nxp:productStatusDate>
      <skos:prefLabel xml:lang="en-us">{data($product/ProductInformation/Description)}</skos:prefLabel>
      <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/{string:toCamelCase(lower-case(data($product/ProductInformation/ProductStatus)))}"/>
      <foaf:homepage rdf:resource="http://www.nxp.com/pip/{$btn}"/>
      <nxp:typeNumber>{$btn}</nxp:typeNumber>
      {
        if (exists($product/ProductInformation/PackageID))
        then <nxp:mechanicalOutline rdf:resource="{packages2rdf:getURI(basictypes:getPackage($product))}"/>
        else ()
      }
    </rdf:Description>
};

declare function basictypes2rdf:fromBasicTypes($products as element(Product)*) as element(rdf:RDF) {
    rdfutil:wrapRDF(basictypes2rdf:fromBasicTypesRaw($products))
};

declare function basictypes2rdf:fromBasicTypeIds($ids as xs:string*) as element(rdf:RDF) {
    basictypes2rdf:fromBasicTypes(basictypes:filterBySet(basictypes:getBasicTypes(), $ids))
};

declare function basictypes2rdf:getURI($product as element(Product)) as xs:anyURI {
    rdfutil:getURI("basic_types", data($product/ProductInformation/Name))
};

(:
    Usages:
    basictypes2rdf:fromBasicTypes(basictypes:getBasicType("PH3330L"))
    basictypes2rdf:fromBasicTypes(basictypes:getBasicTypes()[ProductInformation/PIPType = 0])
    basictypes2rdf:fromBasicTypeIds(("PH3330L","PH3330CL"))
:)
Following expression will produce the output below:
import module namespace basictypes2rdf = "http://www.nxp.com/basictypes2rdf";
basictypes2rdf:fromBasicTypeIds("PH3330L")

<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:nxp="http://purl.org/nxp/schema/v1/">
  <rdf:Description rdf:about="http://data.nxp.com/id/basic_types/ph3330l">
    <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
    <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-28</nxp:productStatusDate>
    <skos:prefLabel xml:lang="en-us">N-channel TrenchMOS logic level FET</skos:prefLabel>
    <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/endOfLife"/>
    <foaf:homepage rdf:resource="http://www.nxp.com/pip/PH3330L"/>
    <nxp:typeNumber>PH3330L</nxp:typeNumber>
    <nxp:mechanicalOutline rdf:resource="http://data.nxp.com/id/package_outline_versions/sot669"/>
  </rdf:Description>
</rdf:RDF>
And you should validate the output just to make sure.

Using SPARQL describe queries

This post will show how to use describe queries from the stardog CLI but the query is not database specific. You can actually export the triples in different formats:
  • NTRIPLES
  • RDFXML
  • TURTLE
  • TRIG
  • TRIX
  • N3
  • NQUADS
Let's try out a simple describe query in 2 formats.

$ ./stardog query  -c http://localhost:5822/nxp -q "DESCRIBE <http://data.nxp.com/basicTypes/PH3330L>" -f RDFXML

<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
  xmlns:owl="http://www.w3.org/2002/07/owl#"
  xmlns:foaf="http://xmlns.com/foaf/0.1/"
  xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
  xmlns:skos="http://www.w3.org/2004/02/skos/core#"
  xmlns:nxp="http://purl.org/nxp/schema/v1/"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

<rdf:Description rdf:about="http://data.nxp.com/basicTypes/PH3330L">
  <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
  <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-28</nxp:productStatusDate>
  <skos:prefLabel xml:lang="en-us">N-channel TrenchMOS logic level FET</skos:prefLabel>
  <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/endOfLife"/>
  <foaf:homepage rdf:resource="http://www.nxp.com/pip/PH3330L"/>
  <nxp:typeNumber>PH3330L</nxp:typeNumber>
  <nxp:mechanicalOutline rdf:resource="http://data.nxp.com/packageOutlineVersion/SOT669"/>
</rdf:Description>

</rdf:RDF>

$ ./stardog query  -c http://localhost:5822/nxp -q "DESCRIBE <http://data.nxp.com/basicTypes/PH3330L>" -f TURTLE

@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix nxp: <http://purl.org/nxp/schema/v1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<http://data.nxp.com/basicTypes/PH3330L> a nxp:BasicType ;
        nxp:productStatusDate "2011-10-28"^^xsd:date ;
        skos:prefLabel "N-channel TrenchMOS logic level FET"@en-us ;
        nxp:productStatus nxp:endOfLife ;
        foaf:homepage <http://www.nxp.com/pip/PH3330L> ;
        nxp:typeNumber "PH3330L" ;
        nxp:mechanicalOutline <http://data.nxp.com/packageOutlineVersion/SOT669> .

You can also return ALL object graphs of a specific type. This will wrap all descriptions in a rdf:RDF tag.
./stardog query  -c http://localhost:5822/nxp -f RDFXML -q "
PREFIX nxp:   <http://purl.org/nxp/schema/v1/>
DESCRIBE ?s
WHERE {
  ?s a nxp:BasicType.
}
" 

<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
 xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
 xmlns:owl="http://www.w3.org/2002/07/owl#"
 xmlns:foaf="http://xmlns.com/foaf/0.1/"
 xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
 xmlns:skos="http://www.w3.org/2004/02/skos/core#"
 xmlns:nxp="http://purl.org/nxp/schema/v1/"
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

<rdf:Description rdf:about="http://data.nxp.com/basicTypes/74AUP1G57GW">
 <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
 <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-20</nxp:productStatusDate>
    ...
</rdf:Description>

<rdf:Description rdf:about="http://data.nxp.com/basicTypes/74AUP1G58GW">
 <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
 <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-14</nxp:productStatusDate>
    ...
</rdf:Description>

</rdf:RDF>

Tuesday, August 21, 2012

Stardog command line interface

There are two CLI's available:
  • stardog-admin: admininstrative client (uses SNARL Protocol only)
  • stardog: a user's client (uses HTTP or SNARL)

$ ./stardog help
Stardog 1.0.4 command line client

Type 'help <cmd>' or '<cmd> -h/--help' to print the usage information for a specific command

Type stardog [subcommand] [args]'

Available commands:
        add
        consistency
        explain inference
        explain plan
        export
        icv convert
        icv validate
        namespace add
        namespace list
        namespace remove
        passwd
        query
        remove
        search
        status

For more information on this library, visit the home page at http://stardog.com/docs/
For information on Stardog, please visit http://stardog.com

$ ./stardog-admin help
Stardog 1.0.4 command line client

Type 'help <cmd>' or '<cmd> -h/--help' to print the usage information for a specific command

Type stardog-admin [global args] [subcommand] [args]'

The global commands are --home, --disable-security, --logfile. See docs for more info.

Available commands:
        copy
        create
        drop
        icv add
        icv drop
        icv remove
        list
        metadata get
        metadata set
        migrate
        offline
        online
        optimize
        passwd
        role add
        role drop
        role grant
        role list
        role permission
        role revoke
        server start
        server stop
        user add
        user drop
        user edit
        user grant
        user list
        user permission
        user revoke

For more information on this library, visit the home page at http://stardog.com/docs/
For information on Stardog, please visit http://stardog.com

Suppose we want to get detailed info about using the 'user list' command.
$ ./stardog-admin help user list
Usage: user list [options]

Lists all users.

Valid Options:
        [--all, -A]                    : Be verbose with user info.

        [--ask-password, -P]           : Prompt for password.

        --format, -f arg               : Format for the output [TEXT, CSV, HTML]

        [--help, -h]                   : Display usage information

        [--passwd, -p arg]             : Password

        [--server arg]                 : URL of Stardog Server. If this option isn't specified, it will be read from JVM argument 'stardog.default.cli.server'. If the JVM arg isn't set, the default value 'snarl://localhost:5820' is used. If server URL
has no explicit port value, the default port value '5820' is used.

        [--username, -u arg]           : User name

Stardog Quick-start notes (Windows)

  • Laptop: Windows 7, 64 bit, intel Core i5-2520M CPU @2.5Ghz, 8GB RAM, 120 GB SSD
  • set STARDOG_HOME variable to e.g. c:/development/stardog-1.0.4 
  • copy the license file over to STARDOG_HOME
  • start the server
$ ./stardog-admin server start
   ************************************************************
   Stardog server 1.0.4 started on Tue Aug 21 11:02:34 CEST 2012.

   SNARL server running on snarl://localhost:5820/
   HTTP server running on http://localhost:5822/.
   Stardog documentation accessible at http://localhost:5822/docs
   SNARL & HTTP servers listening on all interfaces

   STARDOG_HOME=C:\development\stardog-1.0.4
   ************************************************************
  • create a database from input file
  $ ./stardog-admin create -n nxp  -t D -u admin -p admin --server snarl://localhost:5820/ c:/testdata/products.rdf
Bulk loading data to new database.
Data load complete. Loaded 340,819 triples in 00:00:04 @ 69.4K triples/sec.
Successfully created database 'nxp'.  
  • query the database:
  
$ ./stardog query -c http://localhost:5822/nxp -q "
  PREFIX nxp:   <http://purl.org/nxp/schema/v1/>
  PREFIX skos:  <http://www.w3.org/2004/02/skos/core#>
  SELECT ?typeNumber ?prefLabel
  WHERE
  {
    ?x nxp:typeNumber ?typeNumber;
       skos:prefLabel ?prefLabel .
  } 
  LIMIT 10
  " 
Executing Query:

PREFIX nxp:   <http://purl.org/nxp/schema/v1/>
PREFIX skos:  <http://www.w3.org/2004/02/skos/core#>
SELECT ?typeNumber ?prefLabel
WHERE
{
  ?x nxp:typeNumber ?typeNumber;
     skos:prefLabel ?prefLabel .
}
LIMIT 10

+------------------+---------------------------------------------------------------+
|    typeNumber    |                           prefLabel                           |
+------------------+---------------------------------------------------------------+
| "74AUP1G57GW"    | "Low-power configurable multiple function gate"@en-us         |
| "74AUP1G58GW"    | "Low-power configurable multiple function gate"@en-us         |
| "74AUP1T45GW"    | "Low-power dual supply translating transeiver; 3-state"@en-us |
| "74AUP2G32GN"    | "Dual2-inputORgate (IMPULSE)"@en-us                           |
| "74AUP2G32GS"    | "Dual2-inputORgate (IMPULSE)"@en-us                           |
| "74AVC16T245DGG" | "74AVC16T245DGG (IMPULSE)"@en-us                              |
| "74AVC16T245DGV" | "16-Bit Dual-SupplyTx/Rx w/3-State (IMPULSE)"@en-us           |
| "74AVC2T45DC"    | "74AVC2T45DC (IMPULSE)"@en-us                                 |
| "74AVC2T45DP"    | "2-bit Dual-supply translator (IMPULSE)"@en-us                |
| "74AVC2T45GD"    | "2-bit Dual-supply translator (IMPULSE)"@en-us                |
+------------------+---------------------------------------------------------------+

Query returned 10 results in 00:00:00.124

Monday, August 20, 2012

Some extra useful String functions (XQuery)

module namespace string = "http://www.nxp.com/string";

(:
  string:capitalize("test")  -->  "Test" 
:)
declare function string:capitalize($string as xs:string) as xs:string {
    let $tokens := string:split($string)
    return concat(upper-case($tokens[1]), string-join(subsequence($tokens, 2), ''))
};

(:
    string:capitalizeAll("makes you wonder")  --> "Makes You Wonder"
:)
declare function string:capitalizeAll($string as xs:string) as xs:string {
   string-join(for $word in string:splitWords($string) return string:capitalize($word), ' ') 
};

(:
   string:split("work") --> ("w", "o", "r", "k")
:)
declare function string:split($string as xs:string) as xs:string* {
    for $codepoint in string-to-codepoints($string) return codepoints-to-string($codepoint)
};

(:
   string:splitWords("go live")  --> ("go", "live")
:)
declare function string:splitWords($string as xs:string) as xs:string* {
    tokenize($string, "\s+")
};

(: 
  string:toCamelCase("business segment") --> "businessSegment" 
:)
declare function string:toCamelCase($string as xs:string) as xs:string {
    let $words := string:splitWords($string)
    return string-join(($words[1], for $word in subsequence($words, 2) return string:capitalize($word)), '')
};