Revisão | ba07279b6dbc2362089e2250ab2829b4ce232ad6 (tree) |
---|---|
Hora | 2025-01-23 03:29:21 |
Autor | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
A simple script retrieving the data either as a json or as an rdf using a sparql query.
@@ -0,0 +1,97 @@ | ||
1 | +rm(list=ls()) | |
2 | + | |
3 | +library(tidyverse) | |
4 | +library(janitor) | |
5 | +library(httr) | |
6 | +library(jsonlite) | |
7 | +library(rdflib) | |
8 | + | |
9 | +source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R") | |
10 | + | |
11 | + | |
12 | +fetch_sparql <- function(endpoint, query, return_format = c("json", "rdf")) { | |
13 | + # Match the return format to either "json" or "rdf" | |
14 | + return_format <- match.arg(return_format) | |
15 | + | |
16 | + # Set headers and accept type based on desired format | |
17 | + accept_header <- if (return_format == "json") { | |
18 | + "application/sparql-results+json" # JSON format for SELECT query results | |
19 | + } else { | |
20 | + "text/turtle" # RDF format (e.g., Turtle) | |
21 | + } | |
22 | + | |
23 | + # Perform the HTTP request | |
24 | + response <- POST( | |
25 | + url = endpoint, | |
26 | + body = list(query = query), | |
27 | + encode = "form", | |
28 | + add_headers(Accept = accept_header) | |
29 | + ) | |
30 | + | |
31 | + # Check if the request was successful | |
32 | + if (http_error(response)) { | |
33 | + stop("Failed to fetch SPARQL results. HTTP status code: ", status_code(response)) | |
34 | + } | |
35 | + | |
36 | + # Process response based on the requested format | |
37 | + if (return_format == "json") { | |
38 | + # Parse JSON response into a list | |
39 | + result <- content(response, as = "parsed", type = "application/json") | |
40 | + } else if (return_format == "rdf") { | |
41 | + # Parse RDF response into an rdflib object | |
42 | + rdf_data <- content(response, as = "text") # Get the raw RDF data | |
43 | + temp_file <- tempfile(fileext = ".ttl") # Save it temporarily as a Turtle file | |
44 | + writeLines(rdf_data, temp_file) | |
45 | + result <- rdf_parse(temp_file, format = "turtle") # Load into rdflib | |
46 | + } | |
47 | + | |
48 | + return(result) | |
49 | +} | |
50 | + | |
51 | + | |
52 | + | |
53 | +endpoint <- "https://data.europa.eu/sparql" | |
54 | +query <- " | |
55 | +PREFIX dcat: <http://www.w3.org/ns/dcat#> | |
56 | +SELECT * WHERE { ?d a dcat:Dataset } LIMIT 10 | |
57 | +" | |
58 | + | |
59 | + | |
60 | + | |
61 | +# Fetch as JSON | |
62 | +json_result <- fetch_sparql(endpoint, query, return_format = "json") | |
63 | + | |
64 | +# Check the structure of the JSON result (nested list) | |
65 | +str(json_result) | |
66 | + | |
67 | +json_tibble <- json_result |> | |
68 | + simple_json_to_tibble() | |
69 | + | |
70 | +# Fetch as RDF | |
71 | +rdf_result <- fetch_sparql(endpoint, query, return_format = "rdf") | |
72 | + | |
73 | +# Print the RDF graph | |
74 | +print(rdf_result) | |
75 | + | |
76 | +# Query the local RDF graph with rdflib | |
77 | +## local_query <- " | |
78 | +## PREFIX dcat: <http://www.w3.org/ns/dcat#> | |
79 | +## SELECT * WHERE { ?d a dcat:Dataset } LIMIT 5 | |
80 | +## " | |
81 | + | |
82 | + | |
83 | + | |
84 | + | |
85 | +local_query <- " | |
86 | +SELECT ?subject ?predicate ?object WHERE { | |
87 | + ?subject ?predicate ?object . | |
88 | +} | |
89 | +LIMIT 5 | |
90 | +" | |
91 | + | |
92 | + | |
93 | +query_result <-rdf_query(rdf_result, local_query) | |
94 | + | |
95 | +print(query_result) | |
96 | + | |
97 | +print("So far so good") |