Module:Wikidata/Chemin/parser

Cette page fait l’objet d’une mesure de semi-protection étendue.
Une page de Wikipédia, l'encyclopédie libre.
Ceci est une version archivée de cette page, en date du 19 novembre 2016 à 17:03 et modifiée en dernier par TomT0m (discuter | contributions). Elle peut contenir des erreurs, des inexactitudes ou des contenus vandalisés non présents dans la version actuelle.

 Documentation[voir] [modifier] [historique] [purger]


local tool = require("Module:Utilitaire")
local path = require "Module:Wikidata/Chemin/Path"
local parser = require "Module:FParser"

local pparser = {}

--[[

grammar : 

letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
       | "H" | "I" | "J" | "K" | "L" | "M" | "N"
       | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
       | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
       | "c" | "d" | "e" | "f" | "g" | "h" | "i"
       | "j" | "k" | "l" | "m" | "n" | "o" | "p"
       | "q" | "r" | "s" | "t" | "u" | "v" | "w"
       | "x" | "y" | "z" ;
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
space = " " ;

Pid = "P" , digit, { digit } ;
Pname = letter, { letter | digit | space | "'" } ;

Path 	 ::= 	PathAlternative
PathAlternative 	 ::= 	PathSequence ( '|' PathSequence )*
PathSequence 	 ::= 	PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
PathElt 	 ::= 	PathPrimary PathMod?
PathEltOrInverse 	 ::= 	PathElt | '^' PathElt
PathMod 	 ::= 	( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
PathPrimary 	 ::= ( Prop | 'a' | '(' Path ')' 
                     | ( Prop | '!' PathNegatedPropertySet ) '>' ( Prop | '!' PathNegatedPropertySet | PathPropertySet ) 
                     | '!' PathNegatedPropertySet )
                    
Prop ::= IRIref | Pid | Pname

rules 95 and 96 in https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rPathNegatedPropertySet

PathNegatedPropertySet	  ::=  	PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'
PathOneInPropertySet	  ::=  	iri | 'a' | '^' ( iri | 'a' ) 

PathPropertySet           ::=   '(' Path ( '|' Path )+ ')'

For information, SPARQL property path grammar :

https://www.w3.org/TR/sparql11-property-paths/#path-syntax

TriplesSameSubjectPath 	 ::=   	VarOrTerm PropertyListNotEmptyPath | TriplesNode PropertyListPath
PropertyListPath 	 ::= 	PropertyListNotEmpty?
PropertyListNotEmptyPath 	 ::= 	( VerbPath | VerbSimple ) ObjectList ( ';' ( ( VerbPath | VerbSimple ) ObjectList )? )*
VerbPath 	 ::= 	Path
VerbSimple 	 ::= 	Var
Path 	 ::= 	PathAlternative
PathAlternative 	 ::= 	PathSequence ( '|' PathSequence )*
PathSequence 	 ::= 	PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
PathElt 	 ::= 	PathPrimary PathMod?
PathEltOrInverse 	 ::= 	PathElt | '^' PathElt
PathMod 	 ::= 	( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
PathPrimary 	 ::= 	( IRIref | 'a' | '(' Path ')' ) 

--]] 

local lexer = parser.lexer

local chain = parser.chain
local alternative = parser.alternative
local plus = parser.plus
local idop = parser.idop
local lex_char = lexer.lex_char
local parse_epsilon = lexer.lex_epsilon
local lex_integer = lexer.lex_integer

----------------------------------------------------------------------
-- grammar base lexer functions
----------------------------------------------------------------------

local lex_pid = function(state)
	local res = lexer.lex_regex(state, "P[0-9]+")
	if res then res.type="Pid" return res end
end

local lex_sparql_prefix = function(state)
	local res = lexer.lex_regex(state, "[a-z_]*")
	if res then res.type="prefix" return res end
end

local lex_property_name = function(state)
	local res = lexer.lex_regex(state, "[a-zA-Z][a-z A-Z'-]*")
	if res then res.type="Plabel" return res end
end

-------------------------------------------------------------------


-- PathAlternative 	 ::= 	PathSequence ( '|' PathSequence )*
pparser.pathAlternative = function(state)
	local res
	local firstNode
	
	res = chain{
		pparser.pathSequence,
		idop(
			function(state) 
				firstNode = state.node 
			end
		),
		star(
			chain{
				lex_char("[|]"), 
				pparser.pathSequence
			}
		)
	}(state)
	
	if res then 
		if res.acc and #(res.acc) > 0 then 
			local acc = res.acc
			table.insert(acc, 1, firstNode)
			res.node = path.AlternativeNode:create(acc)
		end
		res.acc = nil
		return res
	end
end

--[[
plop=p.parse("P31|P17/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      nodes=>
         1=>
            property=>
               P17
         2=>
            node=>
               property=>
                  P279
                  
plop=p.parse("P31|P17>P31/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      nodes=>
         1=>
            property=>
               P31
            node=>
               P17
         2=>
            node=>
               property=>
                  P279

--]]


-- PathSequence 	 ::= 	PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )* 
pparser.pathSequence = function(state)
	local firstNode
	
	local res = chain{
		pparser.pathEltOrInverse,
		idop(function (res) firstNode = res.node end),
		star(
			alternative{
				chain{
					lexer.lex_char("/"), 
					pparser.pathEltOrInverse
				},
				chain{
					lexer.lex_char("^"), 
					pparser.pathElt,
					function(res) return path.InvNode:create(res.node) end
				}
			}
		)
	}(state)
	if res and #res.acc > 0 then
		local acc = res.acc
		table.insert(acc, 1, firstNode)
		res.node = path.SequenceNode:create(acc)
	end
	return res
end

--[[
Tests:

plop=p.parse("P31/P31+",p.pathSequence) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      node=>
         property=>
            P31
--]]



local show = function (string) return idop(function(state) mw.log(string) end) end


-- PathElt 	 ::= 	PathPrimary PathMod?
-- PathMod 	 ::= 	( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )

function pparser.pathElt(state)
	local node
	local prime_node
	
	local min_bound = nil
	local max_bound = nil
	
	local function create_node(type)
		return idop(
			function(state)
				tool.dump_to_console(min_)
				tool.dump_to_console(max_)
				node = type:create(prime_node, min_bound, max_bound)
			end
		)
    end
	
	local res = chain{
		pparser.pathPrimary,
		idop(function(state) prime_node = state.node end),
		alternative{
			chain{
				lex_char("*"),
				create_node(path.StarNode)
			},
		    chain{
				lex_char("+"),
				create_node(path.PlusNode)
			},
		    chain{
				lex_char("?"),
				create_node(path.MaybeNode)
			},
		    chain{
				lex_char("^"),
				create_node(path.InverseNode)
			},
		    chain{
				lex_char("{"),
				lex_integer,
				idop(function(state) tool.dump_to_console(state) ; min_bound = tonumber(state.lexed) ; tool.dump_to_console(min_bound) end),
				alternative{
					chain{
						lex_char(","), 
						lex_integer,
						idop(function(state) max_bound = tonumber(state.lexed) ; mw.log(max_bound) end)
					},
					chain{
						parse_epsilon, 
						idop(function(state) max_bound = nil end)
					}
				},
				create_node(path.BetweenNode, min_bound, max_bound),
				lex_char("}"),
			},
			chain{
				parse_epsilon,
				idop(function(state) node = prime_node end)
			}
		}
	}(state)

	if res then
		res.node = node
		return res
	end
end


--[[ 

Tests :

plop=p.parse("P31",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
property=>
   P31

plop=p.parse("P31>P279", p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
property=>
   P279
node=>
   P31

plop=p.parse("P31{1,6}",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)


plop=p.parse("(P31|P17>P31)",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      property=>
         P31
      node=>
         P17

--]]

-- PathEltOrInverse 	 ::= 	PathElt | '^' PathElt
pparser.pathEltOrInverse = function(state)
	return alternative{
		pparser.pathElt,
		chain{
			lex_char("^"),
			pparser.pathElt,
			function(state)
				state.node = path.InverseNode(state.node)
				return state
			end
		}
	}(state)
end

local instance = function()
	-- P31/P279*
	return path.SequenceNode:create(
		{
			path.PropertyNode:create("P31"),
			path.StarNode:create(path.PropertyNode:create("P279"))
		}
	)
end

-- PathPrimary 	 ::= ( Prop | '!'  NegatedPropertySet ) ( '>' ( Prop | '!'  NegatedPropertySet ) ) ? | 'a' | '(' Path ')' 

pparser.pathPrimary = function(state)
	local node
	
	local res = alternative{
		chain{
			lex_char('a'), 
			lex_char(' '),
			idop(function(state) node = instance() end)
		},
		chain{
			chain{
				alternative{
					pparser.prop, 
					chain {lex_char('!'), pparser.negatedPropertySet}
				},
				idop(function(state) node = state.node end)
			},
			alternative{
				chain{
					lex_char('>'),
					alternative{
						pparser.prop, 
						chain {lex_char('!'), pparser.negatedPropertySet}
					},
					idop(
						function(state) 
							node = path.QualifiedStatementNode:create(
								node,
								state.node
							)
						end
					)
				},
				parse_epsilon
			}
		},
		chain{
			lexer.open_parenthesis, 
			pparser.path,
			idop(
				function(state) 
					node = state.node 
				end
			),
			lexer.close_parenthesis
		},
		chain{
			lexer.lex_char('!'),
			pparser.negatedPropertySet,
			idop(
				function(state) 
					node = state.node 
				end
			)
		}
	}(state)
	if res then
		res.node = node
		return res
	end
end

--[[
Tests :

p.parse("a ", p.pathPrimary) => yes
p.parse("!P31", p.pathPrimary) => yes
p.parse("!(P31|instance of)", p.pathPrimary) => yes

--]]

-- PathNegatedPropertySet	  ::=  	PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'

pparser.negatedPropertySet = function(state)
	local p_nodes = {}
	
	local acc = function(state) table.insert(p_nodes, state.node) return state end
	
	local res = alternative{
		chain{
			pparser.pathOneInPropertySet, 
			acc
		},
		chain{
			lexer.open_parenthesis,
			pparser.pathOneInPropertySet,
			acc,
			questionmark(
				plus(
					chain{
						lexer.lex_char("|"),
						pparser.pathOneInPropertySet,
						acc
					}
				)
			),
			lexer.close_parenthesis
		}
	}(state)
	if res then res.node = path.NegatedPropertySetNode:create(p_nodes) end
	return res
end

--[[
Tests :

p.parse("!P31",p.negatedPropertySet)
p.parse("(P31|P32)",p.negatedPropertySet) => yes
p.parse("P31",p.negatedPropertySet) => yes
p.parse("^P31",p.negatedPropertySet) => yes
p.parse("^(P31)",p.negatedPropertySet) => nope
p.parse("(P31)",p.negatedPropertySet) => yes
p.parse("(^P31)",p.negatedPropertySet) => yes
p.parse("(^P31|a|plop)",p.negatedPropertySet) => yes

All good(?)

--]]

-- PathOneInPropertySet	  ::=  	iri | 'a' | '^' ( iri | 'a' ) 

pparser.pathOneInPropertySet = function(state)
	local node = {}
	
	local pElement = alternative{
		chain{
			lexer.lex_char('a'),
			idop(function(state) elem = instance() end)
		},
		chain{
			pparser.prop,
			idop(function(state) elem = state.node end)
		}
	}

	local res = alternative{
		chain{
			lexer.lex_char("^"),
			pElement,
			idop(function(state) node = state.node end)
		},
		chain{
			pElement,
			idop(function(state) node = path.InverseNode:create(state.node) end)
		}
	}(state)

	if res then res.node = node end
	return res
end

			

-- Prop ::= IRIref | Pid | Pname
pparser.prop = function(state)
	local res = alternative{
		chain{
			questionmark(
				chain{
					lex_sparql_prefix,
					lex_char(":")
				}
			),
			lex_pid
		},
		lex_property_name
	}(state)
	if res then
		res.node = path.PropertyNode:create(res.lexed)
		return res
	end
end
--[[

Tests :

p.parse("a ", p.primary) => yes
p.parse("P31@", p.prop) => nope
p.parse("P31", p.prop) => nope
p.parse("P31>P279", p.prop) => nope

--]]

pparser.path = function(state)
	return pparser.pathAlternative(state)
end

pparser.parse_path = function (property_path)
	return parser.parse(property_path, pparser.path)
end

-- to test in console
pparser.parse = parser.parse

return pparser