Prolog and R dataframe formats (#697)

* Added the Prolog format.

* fixed a wrong format relic.

* Added the R dataframe format.

---------

Co-authored-by: M. Falda <marco.falda@unipd.it>
Co-authored-by: Niklas Laxström <niklas.laxstrom@gmail.com>
这个提交包含在:
Marco Falda 2024-05-04 09:44:28 +02:00 提交者 GitHub
父节点 a87854fdd0
当前提交 e0d36b9e68
找不到此签名对应的密钥
GPG 密钥 ID: B5690EEEBB952194
共有 8 个文件被更改,包括 479 次插入8 次删除

查看文件

@ -64,7 +64,9 @@ $GLOBALS['srfgFormats'] = [
'interquartilerange',
'interquartilerange.exc',
'mode',
'interquartilemean'
'interquartilemean',
'prolog',
'dataframe'
// Boilerplate
// Enable access to the format identifier
// 'boilerplate',

查看文件

@ -161,8 +161,10 @@ class SemanticResultFormats {
'incoming' => 'SRFIncoming',
'media' => 'SRF\MediaPlayer',
'datatables' => 'SRF\DataTables',
'carousel' => 'SRF\Carousel',
'gantt' => 'SRF\Gantt\GanttPrinter'
'carousel' => 'SRF\Carousel',
'gantt' => 'SRF\Gantt\GanttPrinter',
'prolog' => 'SRF\Prolog\PrologPrinter',
'dataframe' => 'SRF\dataframe\DataframePrinter',
];
$formatAliases = [

查看文件

@ -0,0 +1,208 @@
<?php
namespace SRF\Prolog;
use SMW\FileExportPrinter;
use SMWQueryResult;
/**
* @author Marco Falda
* @since 3.2
*/
class PrologPrinter extends FileExportPrinter {
const HEADER_ROW_OFFSET = 1;
protected $fileFormats = [
'pl' => [
'writer' => 'pl',
'mimetype' => 'text/prolog',
'extension' => '.pl',
],
'pro' => [
'writer' => 'pl',
'mimetype' => 'text/prolog',
'extension' => '.pro',
],
];
protected $fileFormat;
/**
* Output a human readable label for this printer.
*
* @see ResultPrinter::getName
*
* {@inheritDoc}
*/
public function getName() {
return $this->msg( 'srf-printername-prolog' );
}
/**
* @see ExportPrinter::getMimeType()
*
* @since 1.8
*
* @param SMWQueryResult $queryResult
*
* @return string
*/
public function getMimeType( SMWQueryResult $queryResult ) {
return $this->fileFormat[ 'mimetype' ];
}
/**
* @see ExportPrinter::getFileName
*
* @param SMWQueryResult $queryResult
*
* @return string
*/
public function getFileName( SMWQueryResult $queryResult ) {
return ( $this->params[ 'filename' ] ?: base_convert( uniqid(), 16, 36 ) ) . $this->fileFormat[ 'extension' ];
}
/**
* @see ExportPrinter::outputAsFile
*
* @param SMWQueryResult $queryResult
* @param array $params
*/
public function outputAsFile( SMWQueryResult $queryResult, array $params )
{
if ( array_key_exists( 'fileformat', $params) && array_key_exists( $params[ 'fileformat' ]->getValue(), $this->fileFormats )) {
$this->fileFormat = $this->fileFormats[ $params[ 'fileformat' ]->getValue() ];
} else {
$this->fileFormat = $this->fileFormats[ 'pl' ];
}
parent::outputAsFile( $queryResult, $params );
}
/**
* Defines the list of available parameters to an individual result
* printer.
*
* @see ResultPrinter::getParamDefinitions
*
* {@inheritDoc}
*/
public function getParamDefinitions( array $definitions ) {
$params = parent::getParamDefinitions( $definitions );
$definitions[ 'searchlabel' ]->setDefault( wfMessage( 'srf-prolog-link' )->inContentLanguage()->text() );
$params[ 'filename' ] = [
'type' => 'string',
'name' => 'filename',
'default' => '',
'message' => 'srf-paramdesc-prolog-filename',
];
$params[ 'fileformat' ] = [
'type' => 'string',
'name' => 'fileformat',
'default' => 'pl',
'tolower' => true,
'message' => 'srf-paramdesc-prolog-fileformat',
];
$params[ 'pname' ] = [
'type' => 'string',
'name' => 'pname',
'default' => 'predicate',
'tolower' => true,
'message' => 'srf-paramdesc-prolog-pname',
];
$params[ 'navalue' ] = [
'type' => 'string',
'name' => 'navalue',
'default' => "'NA'",
'message' => 'srf-paramdesc-prolog-navalue',
];
return $params;
}
/**
* This method gets the query result object and is supposed to return
* whatever output the format creates. For example, in the list format, it
* goes through all results and constructs an HTML list, which is then
* returned. Looping through the result object is somewhat complex, and
* requires some understanding of the `QueryResult` class.
*
* @see ResultPrinter::getResultText
*
* {@inheritDoc}
*/
protected function getResultText( SMWQueryResult $queryResult, $outputMode ) {
if ( $outputMode === SMW_OUTPUT_FILE ) {
return $this->getResultFileContents( $queryResult );
}
$this->isHTML = ( $outputMode === SMW_OUTPUT_HTML );
return $this->getLink( $queryResult, $outputMode )->getText( $outputMode, $this->mLinker );
}
/**
* @param SMWQueryResult $queryResult
*
* @return string
*/
protected function getResultFileContents( SMWQueryResult $queryResult )
{
$res = '';
/*if ($this->params['rownames'])
$res .= 'row.names=T, ';*/
$preds = [];
while ($resultRow = $queryResult->getNext()) {
$subject = '';
$i = 0;
foreach ($resultRow as $resultField) {
if ($i === 0)
$subject = $dataItems = $resultField->getContent()[0];
else {
$propertyLabel = $resultField->getPrintRequest()->getLabel();
//$subjectLabel = $resultField->getResultSubject()->getTitle()->getFullText();
$dataItems = $resultField->getContent();
if (count($dataItems) > 1) {
$values = [];
while ($value = $resultField->getNextText(SMW_OUTPUT_FILE))
$values[] = $value;
$rowData = "['" . implode("', '", $values) . "']";
}
else {
$nextDataValue = $resultField->getNextDataValue();
if ($nextDataValue !== false) {
if ($nextDataValue instanceof \SMWNumberValue)
$rowData = $nextDataValue;
else if ($nextDataValue instanceof \SMWTimeValue)
$rowData = "'" . $nextDataValue->getISO8601Date() . "'";
else {
$nextDataValue = str_replace("'", "\'", $nextDataValue);
$rowData = "'$nextDataValue'";
}
} else
$rowData = $this->params['navalue'];
}
$preds[] = $this->params['pname'] . "('$subject', '$propertyLabel', $rowData).";
}
$i++;
}
}
$res = implode("\n", $preds);
return $res;
}
}

查看文件

@ -0,0 +1,197 @@
<?php
namespace SRF\dataframe;
use SMW\FileExportPrinter;
use SMWQueryResult;
/**
* @author Marco Falda
* @since 3.2
*/
class DataframePrinter extends FileExportPrinter {
const HEADER_ROW_OFFSET = 1;
protected $fileFormats = [
'R' => [
'writer' => 'R',
'mimetype' => 'text/R',
'extension' => '.R',
],
];
protected $styled = false;
protected $fileFormat;
/**
* Output a human readable label for this printer.
*
* @see ResultPrinter::getName
*
* {@inheritDoc}
*/
public function getName() {
return $this->msg( 'srf-printername-dataframe' );
}
/**
* @see ExportPrinter::getMimeType()
*
* @since 1.8
*
* @param SMWQueryResult $queryResult
*
* @return string
*/
public function getMimeType( SMWQueryResult $queryResult ) {
return $this->fileFormat[ 'mimetype' ];
}
/**
* @see ExportPrinter::getFileName
*
* @param SMWQueryResult $queryResult
*
* @return string
*/
public function getFileName( SMWQueryResult $queryResult ) {
return ( $this->params[ 'filename' ] ?: base_convert( uniqid(), 16, 36 ) ) . $this->fileFormat[ 'extension' ];
}
/**
* @see ExportPrinter::outputAsFile
*
* @param SMWQueryResult $queryResult
* @param array $params
*/
public function outputAsFile( SMWQueryResult $queryResult, array $params )
{
$this->fileFormat = $this->fileFormats[ 'R' ];
parent::outputAsFile( $queryResult, $params );
}
/**
* Defines the list of available parameters to an individual result
* printer.
*
* @return array
*/
public function getParamDefinitions( array $definitions ) {
$params = parent::getParamDefinitions( $definitions );
$definitions[ 'searchlabel' ]->setDefault( wfMessage( 'srf-dataframe-link' )->inContentLanguage()->text() );
$params[ 'filename' ] = [
'type' => 'string',
'name' => 'filename',
'default' => '',
'message' => 'srf-paramdesc-dataframe-filename',
];
$params[ 'fileformat' ] = [
'type' => 'string',
'name' => 'fileformat',
'default' => 'R',
'tolower' => true,
'message' => 'srf-paramdesc-dataframe-fileformat',
];
return $params;
}
/**
* Return serialised results in specified format.
*
*/
protected function getResultText( SMWQueryResult $queryResult, $outputMode ) {
if ( $outputMode === SMW_OUTPUT_FILE ) {
return $this->getResultFileContents( $queryResult );
}
$this->isHTML = ( $outputMode === SMW_OUTPUT_HTML );
return $this->getLink( $queryResult, $outputMode )->getText( $outputMode, $this->mLinker );
}
/**
* @param SMWQueryResult $queryResult
*
* @return string
*/
protected function getResultFileContents( SMWQueryResult $queryResult )
{
$res = 'data.frame(';
if (array_key_exists('rownames', $this->params))
$res .= 'row.names=T, ';
$headers = [];
$printRequests = $queryResult->getPrintRequests();
foreach ( $printRequests as $printRequest ) {
$header = $printRequest->getLabel();
if ($header === '')
$header = 'ID';
$headers[] = $header;
}
$cols = [];
while ($resultRow = $queryResult->getNext()) {
foreach ($resultRow as $resultField) {
$propertyLabel = $resultField->getPrintRequest()->getLabel();
//$subjectLabel = $resultField->getResultSubject()->getTitle()->getFullText();
$dataItems = $resultField->getContent();
if (count($dataItems) > 1) {
$values = [];
while ($value = $resultField->getNextText(SMW_OUTPUT_FILE))
$values[] = $value;
$rowData = "'" . implode( ', ', $values) . "'";
}
else {
$nextDataValue = $resultField->getNextDataValue();
if ($nextDataValue !== false) {
if ($nextDataValue == '')
$rowData = 'NA';
else if ($nextDataValue instanceof \SMWNumberValue)
$rowData = $nextDataValue;
else if ($nextDataValue instanceof \SMWTimeValue)
$rowData = "'" . $nextDataValue->getISO8601Date() . "'";
else {
$nextDataValue = str_replace("'", "\'", $nextDataValue);
$rowData = "'$nextDataValue'";
}
}
else
$rowData = 'NA';
}
$cols[$propertyLabel][/*$subjectLabel*/][] = $rowData;
}
}
/*
INPUT -> cols: [ "prop1" => [ [ [subj111], ['subj112'], [subj113] ], ... ]
e.g.: data.frame("prop1" = c(c(c(subj111), c('subj112'), c(subj113))),
c(subj121, 'subj122', subj123)),
"prop2" = c(c(subj211, 'subj212', subj213),
c(subj221, 'subj222', subj223)))
*/
$i = 0;
foreach ($cols as $props) {
$data1 = array();
foreach ($props as $subjs) {
$data1[] = implode(",\n", $subjs);
}
$data[] = "'" . $headers[$i] . "' = c(" . implode(', ', $data1) . ')';
$i++;
}
$res .= implode(",\n", $data) . ')';
return $res;
}
}

查看文件

@ -390,5 +390,15 @@
"srf-printername-samplestandarddeviation": "Samplestandarddeviation",
"srf-printername-samplevariance": "Samplevariance",
"srf-printername-standarddeviation": "Standarddeviation",
"srf-printername-variance": "Variance"
"srf-printername-variance": "Variance",
"srf-printername-prolog": "Prolog predicates",
"srf-paramdesc-prolog-filename": "The filename for the download of the generated predicates",
"srf-paramdesc-prolog-fileformat": "The format to be produced for the predicates file. Allowed values: pl, pro. Default: pl",
"srf-paramdesc-prolog-pname": "The name of the generated predicates. Default: predicate",
"srf-paramdesc-prolog-navalue": "The value for missing data. Default: 'NA'",
"srf-prolog-link": "Predicates",
"srf-printername-dataframe": "R dataframe",
"srf-paramdesc-dataframe-filename": "The filename for the download of the generated dataframe",
"srf-paramdesc-dataframe-fileformat": "The format to be produced for the dataframe. Allowed values: R.",
"srf-dataframe-link": "Dataframe"
}

查看文件

@ -11,7 +11,8 @@
"Macofe",
"McDutchie",
"S4b1nuz E.656",
"පසිඳු කාවින්ද"
"පසිඳු කාවින්ද",
"Marco Falda"
]
},
"srf-desc": "Formati addizionali per i risultati delle query di Semantic MediaWiki",
@ -148,5 +149,15 @@
"srf-paramdesc-gantt-diagramtitle": "Nome del diagramma",
"srf-paramdesc-gantt-diagramtheme": "Tema del diagramma",
"srf-paramdesc-gantt-axisformat": "Asse-X: Formato data",
"srf-printername-gantt": "Gantt"
"srf-printername-gantt": "Gantt",
"srf-printername-prolog": "Predicati Prolog",
"srf-paramdesc-prolog-filename": "Il nome del file per il download dei predicati Prolog generati",
"srf-paramdesc-prolog-fileformat": "Il formato file dei predicati da produrre: pl, pro. Predefinito: pl",
"srf-paramdesc-prolog-pname": "Il nome dei predicati. Predefinito: predicate",
"srf-paramdesc-prolog-navalue": "Il valore da inserire se il dato e` assente. Predefinito: 'NA'",
"srf-prolog-link": "Predicati",
"srf-printername-dataframe": "Dataframe di R",
"srf-paramdesc-dataframe-filename": "Il nome del file per il download del dataframe generato",
"srf-paramdesc-dataframe-fileformat": "Il formato file del dataframe da produrre: R.",
"srf-dataframe-link": "Dataframe"
}

查看文件

@ -18,7 +18,8 @@
"Toliño",
"Umherirrender",
"Verdy p",
"아라"
"아라",
"Marco Falda"
]
},
"srf-desc": "{{desc|name=Semantic Result Formats|url=https://www.mediawiki.org/wiki/Extension:Semantic_Result_Formats}}",
@ -408,5 +409,15 @@
"srf-printername-samplestandarddeviation": "{{doc-smwformat|samplestandarddeviation}}",
"srf-printername-samplevariance": "{{doc-smwformat|samplevariance}}",
"srf-printername-standarddeviation": "{{doc-smwformat|standarddeviation}}",
"srf-printername-variance":"{{doc-smwformat|variance}}"
"srf-printername-variance":"{{doc-smwformat|variance}}",
"srf-printername-prolog": "{{doc-smwformat|prolog}}",
"srf-paramdesc-prolog-filename": "{{doc-paramdesc|filename}}",
"srf-paramdesc-prolog-fileformat": "{{doc-paramdesc|fileformat}}",
"srf-paramdesc-prolog-pname": "The name of the generated predicates. Default: predicate",
"srf-paramdesc-prolog-navalue": "The value for missing data. Default: 'NA'",
"srf-prolog-link": "{{doc-smw-link}}",
"srf-printername-dataframe": "{{doc-smwformat|dataframe}}",
"srf-paramdesc-dataframe-filename": "{{doc-paramdesc|filename}}",
"srf-paramdesc-dataframe-fileformat": "{{doc-paramdesc|fileformat}}",
"srf-dataframe-link": "{{doc-smw-link}}"
}

查看文件

@ -0,0 +1,30 @@
<?php
namespace SRF\Tests\Prolog;
use SMW\Test\QueryPrinterRegistryTestCase;
class PrologTest extends QueryPrinterRegistryTestCase{
/**
* @see QueryPrinterRegistryTestCase::getFormats
*
* @since 3.2
*
* @return array
*/
public function getFormats() {
return [ 'prolog' ];
}
/**
* @see QueryPrinterRegistryTestCase::getClass
*
* @since 3.2
*
* @return string
*/
public function getClass() {
return '\SRF\Prolog\PrologPrinter';
}
}