Thank you, stephen.
I have corrected the counter
problem, updated my data, and program a little bit. The majority of the code remains the same as before. However, I still cannot get the different results like you did. Allow me to show you my current code, results and database configuration metadata.
SnarlClient.java
package com.sibench.mini;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Iterator;
import org.openrdf.model.Model;
import org.openrdf.model.Statement;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.rio.RDFFormat;
import com.complexible.common.rdf.model.Values;
import com.complexible.stardog.StardogException;
import com.complexible.stardog.api.Connection;
import com.complexible.stardog.api.ConnectionConfiguration;
import com.complexible.stardog.api.reasoning.ReasoningConnection;
import com.complexible.stardog.reasoning.Proof;
public class SnarlClient {
private static String serverURL = "http://localhost:5820";
private static String dbName = "db";
private static String password = "admin";
private static String username = "admin";
private ReasoningConnection aReasoningConn;
private Connection aConn;
public SnarlClient() {
aReasoningConn = ConnectionConfiguration.to(dbName).server(serverURL).credentials(username, password).reasoning(true).connect().as(ReasoningConnection.class);
aConn = ConnectionConfiguration.to(dbName).server(serverURL).credentials(username, password).connect();
info("connected to " + serverURL + "/" + dbName);
emptyDB();
info("database initialized");
}
public void loadData(String path, RDFFormat f) {
if(path != "") {
try {
this.aConn.begin();
this.aConn.add().io().format(f).stream(new FileInputStream(path));
} catch (StardogException e) {
err("data load failed: " + path);
e.printStackTrace();
} catch (FileNotFoundException e) {
err("data file not found: " + path);
e.printStackTrace();
}
this.aConn.commit();
info("data loaded from " + path);
}
else {
info("data path is empty");
}
}
// add a single model to triple-store
public void addModel(Model m, String graph_id) {
aConn.begin();
aConn.add().graph(m, Values.iri(graph_id));
aConn.commit();
}
// query
public TupleQueryResult query(String queryString, boolean enableReasoning) {
if(enableReasoning) {
this.aReasoningConn.begin();
TupleQueryResult result = this.aReasoningConn.select(queryString).execute();
this.aReasoningConn.commit();
return result;
}
else {
return this.aConn.select(queryString).execute();
}
}
// explain
public Iterator<Proof> explain(Statement s) {
return this.aReasoningConn.explain(s).computeNamedGraphs().proofs().iterator();
}
// delete graphs in triple-store
public void deleteGraph(String graphs) {
this.aConn.begin();
// https://groups.google.com/a/clarkparsia.com/forum/#!searchin/stardog/sparql$20drop/stardog/5t8Q63w25w8/iLbQaPByFAAJ
this.aConn.update("delete { graph ?g { ?s ?p ?o } } where { graph ?g {?s ?p ?o.} values ?g{ " + graphs + " }}").execute();
this.aConn.commit();
}
// empty triple-store
public void emptyDB() {
aConn.begin();
aConn.remove().all();
aConn.commit();
info("database cleaned");
}
// clear all graphs in the current database
public void clearAllGraphs() {
this.aConn.begin();
this.aConn.update("delete {graph ?g {?s ?p ?o}} where {graph ?g {?s ?p ?o}}").execute();
this.aConn.commit();
info("all graphs cleared");
}
// clean up everything
public void cleanUp() {
aReasoningConn.close();
aConn.close();
info("all connections closed.");
}
// helper function
private void info(Object x) { System.out.println("[Stardog INFO]" + x);}
private void err(Object x) { System.out.println("[Stardog ERR]" + x);}
}
SIBenchMini.java
package com.sibench.mini;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.openrdf.model.Model;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.rio.RDFFormat;
import com.complexible.common.openrdf.model.Models2;
import com.complexible.common.rdf.model.Values;
public class SIBenchMini {
public static void main(String[] args) throws IOException {
String query = "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> SELECT ?s WHERE { ?s rdf:type ub:Chair . }";
SnarlClient client = new SnarlClient();
client.loadData("./file/ontology/univ-bench.owl", RDFFormat.RDFXML);
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("./file/data/data.rdfstream"))));
String aDataLine = "";
int counter = 1;
while((aDataLine = br.readLine()) != null) {
Pattern pattern = Pattern.compile("<([:.\\/\\-#A-Za-z\\d]+)>\\s<([:.\\/\\-#A-Za-z\\d]+)>\\s[<\"]?([@:.\\/\\-#A-Za-z\\d]+)[>\"]?\\s<([:.\\/\\-#A-Za-z\\d]+)>\\s(\\d+:[\\d.:]+)\\s?(\\d+:[\\d.:]+)?\\s?([\\d.]+)?");
Matcher match = pattern.matcher(aDataLine);
if(match.find()) {
String s = match.group(1);
String p = match.group(2);
String o = match.group(3);
String g = match.group(4);
// create model
Model dataModel = null;
if(o.contains("http")) { dataModel = Models2.newModel(Values.statement(Values.iri(s),Values.iri(p),Values.iri(o))); }
else { dataModel = Models2.newModel(Values.statement(Values.iri(s),Values.iri(p),Values.literal(o))); }
client.addModel(dataModel, g);
}
if(counter % 303 == 0) { // load 303 triples each time
System.out.println("iteration " + counter / 303);
TupleQueryResult result = client.query(query, true);
while(result.hasNext()) {
System.out.println(result.next().toString());
}
result.close();
client.clearAllGraphs();
}
counter++;
}
br.close();
}
}
My data and ontology reside here: Dropbox - File Deleted
My results are
[Stardog INFO]connected to http://localhost:5820/db
[Stardog INFO]database cleaned
[Stardog INFO]database initialized
[Stardog INFO]data loaded from ./file/ontology/univ-bench.owl
iteration 1: [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 2 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 3 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 4 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 5 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 6 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 7 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 8 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
iteration 9 : [s=http://www.Department0.University0.edu/FullProfessor7]
[Stardog INFO]all graphs cleared
In SIBenchMini.java, I load 303 triples each time. The result at Iteration 1, http://www.Department0.University0.edu/FullProfessor7
, is the correct answer. Then the program continues to dump all the named graphs in the database. (in this program, each triple read from data.rdfstream
is wrapped in a unique graph id, so the number of triples is equal to the number of graphs. The ontology will not be dumped as it doesn't have an explicit named graph).
In data.rdfstream
file, http://www.Department0.University0.edu/FullProfessor7
related triples are only in first 303 triples. This means that when load and query the next 303 triples, http://www.Department0.University0.edu/FullProfessor7
shouldn't be returned, as all of its related triples are dumped already. However, what turns out is that it keeps repeating, and this is the confusing problem.
I have set a break point right after client.clearAllGraphs()
, which means the database should only contain ontology data, and is expected to return nothing. But the web console still returns http://www.Department0.University0.edu/FullProfessor7
. I also tried in my terminal using stardog query --reasoning db "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> SELECT ?s WHERE { ?s rdf:type ub:Chair . }"
, it returns and only returns http://www.Department0.University0.edu/FullProfessor7
as well, even if the database is empty!
I am so confusing that this problem keeps coming to me even if I have tested on different machines and operating systems, and why it works on your machine?
Did I do something wrong, so that the connection "cached" the first result and always returns it?
Sorry for such a long post, but I really wanted to provide as much information as possible so that you can better understand this situation.
Really appreciate your time and help!
Robert
I have also attached my database metadata configuration if it can help to diagnose the problem.
+-------------------------------------------+----------------------------------------------------------------------------------+
| Option | Value |
+-------------------------------------------+----------------------------------------------------------------------------------+
| database.archetypes | |
| database.connection.timeout | 1h |
| database.creator | admin |
| database.name | db |
| database.namespaces | rdf=http://www.w3.org/1999/02/22-rdf-syntax-ns#, |
| | rdfs=http://www.w3.org/2000/01/rdf-schema#, |
| | xsd=http://www.w3.org/2001/XMLSchema#, owl=http://www.w3.org/2002/07/owl#, |
| | stardog=tag:stardog:api:, =http://api.stardog.com/ |
| database.online | true |
| database.time.creation | 2017-04-14T13:17:46.182-04:00 |
| database.time.modification | 2017-04-18T14:47:37.259-04:00 |
| docs.default.rdf.extractors | tika |
| docs.default.text.extractors | tika |
| docs.filesystem.uri | file:/// |
| docs.path | docs |
| icv.active.graphs | default |
| icv.consistency.automatic | false |
| icv.enabled | false |
| icv.reasoning.enabled | false |
| index.differential.enable.limit | 1000000 |
| index.differential.merge.limit | 10000 |
| index.differential.size | 0 |
| index.disk.page.count.total | 123768 |
| index.disk.page.count.used | 15 |
| index.disk.page.fill.ratio | 0.4501708984375 |
| index.last.tx | 8aafb66d-b9c1-439e-8e8d-eea4c6c12433 |
| index.literals.canonical | true |
| index.named.graphs | true |
| index.persist | true |
| index.persist.sync | true |
| index.size | 585 |
| index.statistics.update.automatic | true |
| index.type | Disk |
| preserve.bnode.ids | true |
| progress.monitor.enabled | true |
| query.all.graphs | true |
| query.plan.reuse | ALWAYS |
| query.timeout | 5m |
| reasoning.approximate | false |
| reasoning.classify.eager | true |
| reasoning.consistency.automatic | false |
| reasoning.punning.enabled | false |
| reasoning.sameas | OFF |
| reasoning.schema.graphs | * |
| reasoning.schema.timeout | 1m |
| reasoning.type | DL |
| reasoning.virtual.graph.enabled | true |
| search.default.limit | 100 |
| search.enabled | false |
| search.index.datatypes | http://www.w3.org/2001/XMLSchema#string, |
| | http://www.w3.org/1999/02/22-rdf-syntax-ns#langString |
| search.reindex.mode | sync |
| search.wildcard.search.enabled | false |
| security.named.graphs | false |
| spatial.enabled | false |
| spatial.index.version | 1 |
| spatial.precision | 11 |
| strict.parsing | true |
| transaction.isolation | SNAPSHOT |
| transaction.logging | false |
| transaction.logging.ignore.startup.errors | true |
| transaction.logging.rotation.remove | true |
| transaction.logging.rotation.size | 524288000 |
| versioning.directory | versioning |
| versioning.enabled | false |
+-------------------------------------------+----------------------------------------------------------------------------------+