From a2e0aa95d0b6f42c4d5b2e87c273f7fe564e8765 Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Wed, 24 Apr 2024 21:39:03 -0600
Subject: [PATCH 01/10] Creates docs for class consistent with other examples
adds a list to seperate out each use-case
adds throws and author
---
.../FetchOnlineDataExampleUpdate.java | 60 +++++++++++++++++++
1 file changed, 60 insertions(+)
create mode 100644 src/examples/FetchOnlineDataExampleUpdate.java
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
new file mode 100644
index 0000000..8b35e5a
--- /dev/null
+++ b/src/examples/FetchOnlineDataExampleUpdate.java
@@ -0,0 +1,60 @@
+package examples;
+
+/*
+ * #%L
+ * Wikidata Toolkit Examples
+ * %%
+ * Copyright (C) 2014 - 2015 Wikidata Toolkit Developers
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
+import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
+import org.wikidata.wdtk.wikibaseapi.BasicApiConnection;
+import org.wikidata.wdtk.wikibaseapi.WbSearchEntitiesResult;
+import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
+import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
+
+/**
+ * This example demonstrates fetching data from Wikidata.org and covers finding: single entities, multiple entites, and applying various filters to reduce the volume of data fetched.
+ *
+ *
+ * - Fetching data for one entity: Fetches data for a single entity with the Qid "Q42" and prints the data. If this entity is an item it also prints the English label.
+ * - Fetching data for several entities: Fetches data for multiple entities with the Qids "Q80" and "P31" and prints their IDs.
+ * - Fetching data for entities applying filters: This example shows how to apply filters to reduce the volume of fetched data. It sets filters for: site links, language, and propertities, then fetches data for a single entity with Qid "Q8", then prints its French label and English Wikipedia page title.
+ * - Fetching by Page Title: Fetches data for a single entity with the page titles (e.g. "Terry Pratchett" on English Wikipedia), then prints the correspinding Qid.
+ *- Fetching by Multiple Page Titles: Fetches data for multiple entities with the page titles (e.g. "Wikidata" and "Wikipedia" on English Wikipedia), then prints the correspinding Qids.
+ *- Searching for Entities: Searches for entities with a specific label (e.g. "Douglas Adams") and prints the Qids and labels of the search results.
+ *
+ *
+ * @throws MediaWikiApiErrorException
+ * @throws IOException
+
+ * @author Markus Kroetzsch
+ */
+
+public class FetchOnlineDataExampleUpdate {
+
+ public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
+
+ }
+
+}
From 14fb2848dfbb05502a52daf5d0793731e5a0c48b Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Wed, 24 Apr 2024 21:54:14 -0600
Subject: [PATCH 02/10] adds printDoc method follows new description from above
---
src/examples/FetchOnlineDataExampleUpdate.java | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
index 8b35e5a..416d095 100644
--- a/src/examples/FetchOnlineDataExampleUpdate.java
+++ b/src/examples/FetchOnlineDataExampleUpdate.java
@@ -54,7 +54,23 @@
public class FetchOnlineDataExampleUpdate {
public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
-
+ ExampleHelpers.configureLogging();
+ FetchOnlineDataExampleUpdate.printDocumentation();
}
+ /**
+ * Prints some basic documentation about this program.
+ */
+ public static void printDocumentation() {
+ System.out
+ .println("********************************************************************");
+ System.out.println("*** Wikidata Toolkit: FetchOnlineDataExample");
+ System.out.println("*** ");
+ System.out
+ .println("*** This application demonstrates fetching data from Wikidata.org and covers finding: single entities, multiple entites, and applying various filters to reduce the volume of data fetched.");
+ System.out.println("*** It does not download any dump files.");
+ System.out
+ .println("********************************************************************");
+
+ }
}
From 67fc5fbf186e905e5915ed99859a4f770d25dc0b Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Sun, 28 Apr 2024 22:33:37 -0600
Subject: [PATCH 03/10] simplify docs explainer, and adds helpers with javadocs
for fetching single and multiple entities
---
.../FetchOnlineDataExampleUpdate.java | 90 ++++++++++++++++++-
1 file changed, 86 insertions(+), 4 deletions(-)
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
index 416d095..1bea248 100644
--- a/src/examples/FetchOnlineDataExampleUpdate.java
+++ b/src/examples/FetchOnlineDataExampleUpdate.java
@@ -21,6 +21,7 @@
*/
import java.io.IOException;
+import java.io.PrintStream;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
@@ -34,11 +35,10 @@
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
/**
- * This example demonstrates fetching data from Wikidata.org and covers finding: single entities, multiple entites, and applying various filters to reduce the volume of data fetched.
+ * This example demonstrates fetching data from the Wikidata.org API and covers fetching entity data, and applying various filters to reduce the volume of data returned.
*
*
- * - Fetching data for one entity: Fetches data for a single entity with the Qid "Q42" and prints the data. If this entity is an item it also prints the English label.
- * - Fetching data for several entities: Fetches data for multiple entities with the Qids "Q80" and "P31" and prints their IDs.
+ * - Fetching data for a single entity, or multiple entities, using entity Qids (e.g. "Q42" and "P31").
* - Fetching data for entities applying filters: This example shows how to apply filters to reduce the volume of fetched data. It sets filters for: site links, language, and propertities, then fetches data for a single entity with Qid "Q8", then prints its French label and English Wikipedia page title.
* - Fetching by Page Title: Fetches data for a single entity with the page titles (e.g. "Terry Pratchett" on English Wikipedia), then prints the correspinding Qid.
*- Fetching by Multiple Page Titles: Fetches data for multiple entities with the page titles (e.g. "Wikidata" and "Wikipedia" on English Wikipedia), then prints the correspinding Qids.
@@ -56,6 +56,87 @@ public class FetchOnlineDataExampleUpdate {
public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
ExampleHelpers.configureLogging();
FetchOnlineDataExampleUpdate.printDocumentation();
+
+ WikibaseDataFetcher wbdf = new WikibaseDataFetcher(
+ BasicApiConnection.getWikidataApiConnection(),
+ Datamodel.SITE_WIKIDATA);
+
+ fetchEntityDataByQid(wbdf);
+ fetchEntitiesDataByQid(wbdf);
+ }
+
+ /**
+ * This method fetches data for a single entity from the Wikidata.org API using its QID (unique identifier).
+ * The fetched data is then written to a file and the English name for the entity is printed to the console.
+ *
+ * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
+ * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
+ * @throws IOException If there is an error while writing the fetched data to a file.
+ */
+ public static void fetchEntityDataByQid(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Fetching data for one entity:");
+ EntityDocument q42 = wbdf.getEntityDocument("Q42");
+ writeEntityDataToFile(q42, "entity-Q42.txt");
+ if (q42 instanceof ItemDocument) {
+ System.out.println("The English name for entity Q42 is "
+ + ((ItemDocument) q42).getLabels().get("en").getText());
+ }
+ System.out.println("Raw data for entity Q42 written to file entity-Q42.txt");
+ }
+
+ /**
+ * This method fetches data for multiple entities from the Wikidata.org API using their QID's (unique identifier).
+ * The fetched data is then written to a file and the Qid's are printed to the console.
+ *
+ * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
+ * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
+ * @throws IOException If there is an error while writing the fetched data to a file.
+ */
+ public static void fetchEntitiesDataByQid(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Fetching data for several entities:");
+ Map results = wbdf.getEntityDocuments("Q80",
+ "P31");
+ // Keys of this map are Qids, but we only use the values here:
+ for (EntityDocument ed : results.values()) {
+ System.out.println("Successfully retrieved data for "
+ + ed.getEntityId().getId());
+ }
+ writeEntityDataToFile(results.values().toArray(new EntityDocument[0]), "entities-Q80-P31.txt");
+ System.out.println("Raw data for entities Q80 and P31 written to file entities-Q80-P31.txt");
+ }
+
+ /**
+ * This method writes an array of EntityDocument objects to a file. It uses a PrintStream to write each
+ * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
+ * the stack trace.
+ *
+ * @param entityDocuments An array of EntityDocument objects to be written to the file.
+ * @param fileName The name of the file to which the entity data will be written.
+ */
+ private static void writeEntityDataToFile(EntityDocument[] entityDocuments, String fileName) {
+ try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
+ for (EntityDocument entityDocument : entityDocuments) {
+ out.println(entityDocument);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * This method writes a single EntityDocument object to a file. It uses a PrintStream to write the
+ * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
+ * the stack trace.
+ *
+ * @param entityDocument An EntityDocument object to be written to the file.
+ * @param fileName The name of the file to which the entity data will be written.
+ */
+ private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
+ try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
+ out.println(entityDocument);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
/**
@@ -67,7 +148,8 @@ public static void printDocumentation() {
System.out.println("*** Wikidata Toolkit: FetchOnlineDataExample");
System.out.println("*** ");
System.out
- .println("*** This application demonstrates fetching data from Wikidata.org and covers finding: single entities, multiple entites, and applying various filters to reduce the volume of data fetched.");
+ .println(
+ "*** This example demonstrates fetching data from the Wikidata.org API and covers fetching entity data and applying various filters to reduce the volume of data returned.");
System.out.println("*** It does not download any dump files.");
System.out
.println("********************************************************************");
From 34c2abb0d97791140af513b2c0494dd00060a9f1 Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Sun, 28 Apr 2024 22:34:27 -0600
Subject: [PATCH 04/10] formatting out extra space
---
.../FetchOnlineDataExampleUpdate.java | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
index 1bea248..6f17476 100644
--- a/src/examples/FetchOnlineDataExampleUpdate.java
+++ b/src/examples/FetchOnlineDataExampleUpdate.java
@@ -106,13 +106,13 @@ public static void fetchEntitiesDataByQid(WikibaseDataFetcher wbdf) throws Media
}
/**
- * This method writes an array of EntityDocument objects to a file. It uses a PrintStream to write each
- * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
- * the stack trace.
- *
- * @param entityDocuments An array of EntityDocument objects to be written to the file.
- * @param fileName The name of the file to which the entity data will be written.
- */
+ * This method writes an array of EntityDocument objects to a file. It uses a PrintStream to write each
+ * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
+ * the stack trace.
+ *
+ * @param entityDocuments An array of EntityDocument objects to be written to the file.
+ * @param fileName The name of the file to which the entity data will be written.
+ */
private static void writeEntityDataToFile(EntityDocument[] entityDocuments, String fileName) {
try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
for (EntityDocument entityDocument : entityDocuments) {
@@ -124,13 +124,13 @@ private static void writeEntityDataToFile(EntityDocument[] entityDocuments, Stri
}
/**
- * This method writes a single EntityDocument object to a file. It uses a PrintStream to write the
- * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
- * the stack trace.
- *
- * @param entityDocument An EntityDocument object to be written to the file.
- * @param fileName The name of the file to which the entity data will be written.
- */
+ * This method writes a single EntityDocument object to a file. It uses a PrintStream to write the
+ * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
+ * the stack trace.
+ *
+ * @param entityDocument An EntityDocument object to be written to the file.
+ * @param fileName The name of the file to which the entity data will be written.
+ */
private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
out.println(entityDocument);
From fd56d934025ff4aae46a66f998d8c40121673a72 Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Sun, 28 Apr 2024 23:09:28 -0600
Subject: [PATCH 05/10] finalizes method reconstruction inits with javadoc
inits
opted to use the Qid search to be singular with hints of getting plural resutls
opted to use the byTitle section to example getting with multiple titles to search and hints of getting singular results
---
.../FetchOnlineDataExampleUpdate.java | 106 ++++++++++++------
1 file changed, 73 insertions(+), 33 deletions(-)
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
index 6f17476..3f7f9f2 100644
--- a/src/examples/FetchOnlineDataExampleUpdate.java
+++ b/src/examples/FetchOnlineDataExampleUpdate.java
@@ -38,11 +38,10 @@
* This example demonstrates fetching data from the Wikidata.org API and covers fetching entity data, and applying various filters to reduce the volume of data returned.
*
*
- * - Fetching data for a single entity, or multiple entities, using entity Qids (e.g. "Q42" and "P31").
- * - Fetching data for entities applying filters: This example shows how to apply filters to reduce the volume of fetched data. It sets filters for: site links, language, and propertities, then fetches data for a single entity with Qid "Q8", then prints its French label and English Wikipedia page title.
+ * - Fetching data for a single entity and multiple entities, using entity Qids (e.g. "Q42" and "P31").
* - Fetching by Page Title: Fetches data for a single entity with the page titles (e.g. "Terry Pratchett" on English Wikipedia), then prints the correspinding Qid.
- *- Fetching by Multiple Page Titles: Fetches data for multiple entities with the page titles (e.g. "Wikidata" and "Wikipedia" on English Wikipedia), then prints the correspinding Qids.
- *- Searching for Entities: Searches for entities with a specific label (e.g. "Douglas Adams") and prints the Qids and labels of the search results.
+ *- Searching for Entities: Searches for entities using a search term (e.g. "Douglas Adams") and which language Wiki to search (e.g. "fr"), then prints the Qids and labels of the search results.
+ * - Fetching data for entities applying filters: This example shows how to apply filters to reduce the volume of fetched data. It sets filters for: site links, language, and propertities, then fetches data for a single entity with Qid "Q8", then prints its French label and English Wikipedia page title.
*
*
* @throws MediaWikiApiErrorException
@@ -61,8 +60,10 @@ public static void main(String[] args) throws MediaWikiApiErrorException, IOExce
BasicApiConnection.getWikidataApiConnection(),
Datamodel.SITE_WIKIDATA);
- fetchEntityDataByQid(wbdf);
- fetchEntitiesDataByQid(wbdf);
+ fetchEntityDataByQid(wbdf); // get entities using their Qids...
+ fetchEntitiesByTitles(wbdf); // get entities using their page titles...
+ fetchEntityBySearchTerm(wbdf); // search for entities using query text...
+ fetchEntityAndApplyFilters(wbdf); // get entities applying filters...
}
/**
@@ -72,54 +73,93 @@ public static void main(String[] args) throws MediaWikiApiErrorException, IOExce
* @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
* @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
* @throws IOException If there is an error while writing the fetched data to a file.
+
+ @see #WikibaseDataFetcher.getEntityDocuments(String[] qids)
*/
public static void fetchEntityDataByQid(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data for one entity:");
+
+ //? Multiple entities can be fetched using the plural getEntityDocuments() method.
EntityDocument q42 = wbdf.getEntityDocument("Q42");
- writeEntityDataToFile(q42, "entity-Q42.txt");
+
if (q42 instanceof ItemDocument) {
+ writeEntityDataToFile(q42, "entity-Q42.txt");
System.out.println("The English name for entity Q42 is "
+ ((ItemDocument) q42).getLabels().get("en").getText());
+ System.out.println("Raw data for entity Q42 written to file entity-Q42.txt");
+ } else {
+ System.out.println("Entity Q42 was not found!");
}
- System.out.println("Raw data for entity Q42 written to file entity-Q42.txt");
}
/**
- * This method fetches data for multiple entities from the Wikidata.org API using their QID's (unique identifier).
- * The fetched data is then written to a file and the Qid's are printed to the console.
- *
+ * This method fetches data for a single entity from the Wikidata.org API using an entities title.
+ * The fetched data is then written to a file and the Qid's and labels of the search results are printed to the console.
+ *
+ * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
+ * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
+ * @throws IOException If there is an error while writing the fetched data to a file.
+ *
+ * @see #WikibaseDataFetcher.getEntityDocumentByTitle(String siteIri, String pageTitle)
+ */
+ public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Fetching data based on page title:");
+
+ //? Singular entities, similar to above, can be fetched using the singular getEntityDocumentByTitle() method.
+ Map results = wbdf.getEntityDocumentsByTitle("enwiki",
+ "Terry Pratchett", "Neil Gaiman");
+ for (Entry entry : results.entrySet()) {
+ writeEntityDataToFile(entry.getValue(), "entity-" + entry.getKey() + ".txt");
+ System.out.println("The Qid of " + entry.getKey() + " is " + entry.getValue().getEntityId().getId());
+ System.out
+ .println("Raw data for entity " + entry.getKey() + " written to file entity-" + entry.getKey() + ".txt");
+ }
+ }
+
+ /**
+ * This method fetches data for a single entity from the Wikidata.org API using a search term and language target or iri.
+ * The fetched data's Qid and labels are then printed to the console.
+ *
* @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
* @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
* @throws IOException If there is an error while writing the fetched data to a file.
*/
- public static void fetchEntitiesDataByQid(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
- System.out.println("*** Fetching data for several entities:");
- Map results = wbdf.getEntityDocuments("Q80",
- "P31");
- // Keys of this map are Qids, but we only use the values here:
- for (EntityDocument ed : results.values()) {
- System.out.println("Successfully retrieved data for "
- + ed.getEntityId().getId());
+ public static void fetchEntityBySearchTerm(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Searching for entities:");
+ for (WbSearchEntitiesResult result : wbdf.searchEntities("Douglas Adams", "fr")) {
+ System.out.println("Found entity " + result.getTitle() + " with Qid "
+ + result.getEntityId());
}
- writeEntityDataToFile(results.values().toArray(new EntityDocument[0]), "entities-Q80-P31.txt");
- System.out.println("Raw data for entities Q80 and P31 written to file entities-Q80-P31.txt");
}
/**
- * This method writes an array of EntityDocument objects to a file. It uses a PrintStream to write each
- * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
- * the stack trace.
+ * This method fetches data for a single entity, then applies filters by selecting only site links from English Wikipedia, labels in French, which have no statements at all.
+ * The fetched data is then written to a file and the French label and English Wikipedia page title are printed to the console.
*
- * @param entityDocuments An array of EntityDocument objects to be written to the file.
- * @param fileName The name of the file to which the entity data will be written.
+ * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
+ * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
+ * @throws IOException If there is an error while writing the fetched data to a file.
*/
- private static void writeEntityDataToFile(EntityDocument[] entityDocuments, String fileName) {
- try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
- for (EntityDocument entityDocument : entityDocuments) {
- out.println(entityDocument);
- }
- } catch (IOException e) {
- e.printStackTrace();
+ public static void fetchEntityAndApplyFilters(WikibaseDataFetcher wbdf)
+ throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Fetching data for entities applying filters:");
+
+ //? Filters can be applied to reduce the volume of fetched data.
+ //? Filters can be set for site links, language, and properties.
+ wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); // Only site links from English Wikipedia
+ wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); // Only labels in French
+ wbdf.getFilter().setPropertyFilter(Collections.emptySet()); // No statements at all
+ EntityDocument q8 = wbdf.getEntityDocument("Q8");
+
+ if (q8 instanceof ItemDocument) {
+ writeEntityDataToFile(q8, "entity-Q8.txt");
+ System.out.println("The French label for entity Q8 is "
+ + ((ItemDocument) q8).getLabels().get("fr").getText()
+ + "\nand its English Wikipedia page has the title "
+ + ((ItemDocument) q8).getSiteLinks().get("enwiki").getPageTitle() + ".");
+ System.out.println("Raw data for entity Q8 written to file entity-Q8.txt");
+ } else {
+ System.out.println("Entity Q8 was not found!");
}
}
From 48aac0e3c46fb3ed2aa127b3344b03a10ef35f4e Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Tue, 30 Apr 2024 18:31:23 -0600
Subject: [PATCH 06/10] Fresh eyes, re-read/write for grammar and clarity
---
.../FetchOnlineDataExampleUpdate.java | 38 ++++++++++---------
1 file changed, 21 insertions(+), 17 deletions(-)
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
index 3f7f9f2..c635d2e 100644
--- a/src/examples/FetchOnlineDataExampleUpdate.java
+++ b/src/examples/FetchOnlineDataExampleUpdate.java
@@ -35,14 +35,17 @@
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
/**
- * This example demonstrates fetching data from the Wikidata.org API and covers fetching entity data, and applying various filters to reduce the volume of data returned.
- *
+ * This example demonstrates fetching data from the Wikidata.org API including fetching entity data and applying various filters to reduce the volume of data returned.
+ *
+ * This example does not download any dump files.
*
* - Fetching data for a single entity and multiple entities, using entity Qids (e.g. "Q42" and "P31").
- * - Fetching by Page Title: Fetches data for a single entity with the page titles (e.g. "Terry Pratchett" on English Wikipedia), then prints the correspinding Qid.
- *- Searching for Entities: Searches for entities using a search term (e.g. "Douglas Adams") and which language Wiki to search (e.g. "fr"), then prints the Qids and labels of the search results.
- * - Fetching data for entities applying filters: This example shows how to apply filters to reduce the volume of fetched data. It sets filters for: site links, language, and propertities, then fetches data for a single entity with Qid "Q8", then prints its French label and English Wikipedia page title.
+ * - Fetching by Page Title: Fetches data for a single entity with the page titles (e.g. "Terry Pratchett" on English Wikipedia), then prints the correspinding Qid.
+ * - Searching for Entities: Searches for entities using a search term (e.g. "Douglas Adams") and which language Wiki (or "iri") to search (e.g. "fr"), then prints the Qids and labels of the search results.
+ * - Fetching data for entities and applying filters to limit the volume of data returned. It sets filters for: site links, language, and propertities, then fetches data for a single entity with Qid "Q8", then prints its French label and English Wikipedia page title.
*
+ *
+ * @implNote The results are written to the "/results/" directory in the project root.
*
* @throws MediaWikiApiErrorException
* @throws IOException
@@ -93,15 +96,15 @@ public static void fetchEntityDataByQid(WikibaseDataFetcher wbdf) throws MediaWi
}
/**
- * This method fetches data for a single entity from the Wikidata.org API using an entities title.
- * The fetched data is then written to a file and the Qid's and labels of the search results are printed to the console.
- *
- * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
- * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
- * @throws IOException If there is an error while writing the fetched data to a file.
- *
- * @see #WikibaseDataFetcher.getEntityDocumentByTitle(String siteIri, String pageTitle)
- */
+ * This method fetches data for a single entity from the Wikidata.org API using an entities title.
+ * The fetched data is then written to a file and the Qid's and labels of the search results are printed to the console.
+ *
+ * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
+ * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
+ * @throws IOException If there is an error while writing the fetched data to a file.
+ *
+ * @see #WikibaseDataFetcher.getEntityDocumentByTitle(String siteIri, String pageTitle)
+ */
public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data based on page title:");
@@ -133,7 +136,7 @@ public static void fetchEntityBySearchTerm(WikibaseDataFetcher wbdf) throws Medi
}
/**
- * This method fetches data for a single entity, then applies filters by selecting only site links from English Wikipedia, labels in French, which have no statements at all.
+ * This method fetches data for a single entity, then applies filters by selecting only site links from English Wikipedia, and labels in French which have no statements at all.
* The fetched data is then written to a file and the French label and English Wikipedia page title are printed to the console.
*
* @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
@@ -144,8 +147,7 @@ public static void fetchEntityAndApplyFilters(WikibaseDataFetcher wbdf)
throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data for entities applying filters:");
- //? Filters can be applied to reduce the volume of fetched data.
- //? Filters can be set for site links, language, and properties.
+ // apply filters to the data fetched
wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); // Only site links from English Wikipedia
wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); // Only labels in French
wbdf.getFilter().setPropertyFilter(Collections.emptySet()); // No statements at all
@@ -170,6 +172,8 @@ public static void fetchEntityAndApplyFilters(WikibaseDataFetcher wbdf)
*
* @param entityDocument An EntityDocument object to be written to the file.
* @param fileName The name of the file to which the entity data will be written.
+ *
+ * @implNote The file is written to the "/results/" directory in the project root.
*/
private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
From 1b5517a0816b8a2626d52c0cda91f57e50223573 Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Mon, 6 May 2024 17:12:11 -0600
Subject: [PATCH 07/10] implements intended final changes
- adds optional o/p to maintain minimum change to original functionality
- tighten up language for kind of Ent returned with the search functionality
- prints the results of the search func to its own file
---
.../FetchOnlineDataExampleUpdate.java | 59 ++++++++++++++-----
1 file changed, 45 insertions(+), 14 deletions(-)
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
index c635d2e..5ae6ecc 100644
--- a/src/examples/FetchOnlineDataExampleUpdate.java
+++ b/src/examples/FetchOnlineDataExampleUpdate.java
@@ -54,6 +54,7 @@
*/
public class FetchOnlineDataExampleUpdate {
+ private static boolean printOutputToResultsDirectory = false; // set to true to print the output in the results directory
public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
ExampleHelpers.configureLogging();
@@ -65,7 +66,7 @@ public static void main(String[] args) throws MediaWikiApiErrorException, IOExce
fetchEntityDataByQid(wbdf); // get entities using their Qids...
fetchEntitiesByTitles(wbdf); // get entities using their page titles...
- fetchEntityBySearchTerm(wbdf); // search for entities using query text...
+ fetchEntityResultBySearchTerm(wbdf); // search entities using query text...
fetchEntityAndApplyFilters(wbdf); // get entities applying filters...
}
@@ -120,23 +121,27 @@ public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaW
}
/**
- * This method fetches data for a single entity from the Wikidata.org API using a search term and language target or iri.
+ * This method fetches data for a single entity from the Wikidata.org API using a search term and language target (a.k.a iri).
* The fetched data's Qid and labels are then printed to the console.
*
* @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
* @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
* @throws IOException If there is an error while writing the fetched data to a file.
*/
- public static void fetchEntityBySearchTerm(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
- System.out.println("*** Searching for entities:");
- for (WbSearchEntitiesResult result : wbdf.searchEntities("Douglas Adams", "fr")) {
- System.out.println("Found entity " + result.getTitle() + " with Qid "
- + result.getEntityId());
+ public static void fetchEntityResultBySearchTerm(WikibaseDataFetcher wbdf)
+ throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Searching for entities matching: 'Douglas Adams' ");
+ try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream("search-results.txt"))) {
+ for (WbSearchEntitiesResult result : wbdf.searchEntities("Douglas Adams", "fr")) {
+ writeSearchResultsToFile(result, out);
+ System.out.println("Found result " + result.getLabel() + " with Qid " + result.getTitle() + ".");
+ }
}
+ System.out.println("Search results written to file search-results.txt");
}
/**
- * This method fetches data for a single entity, then applies filters by selecting only site links from English Wikipedia, and labels in French which have no statements at all.
+ * This method fetches data for a single entity, then applies filters to limit the data by selecting only site links from English Wikipedia, and labels in French which have no statements at all.
* The fetched data is then written to a file and the French label and English Wikipedia page title are printed to the console.
*
* @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
@@ -147,7 +152,7 @@ public static void fetchEntityAndApplyFilters(WikibaseDataFetcher wbdf)
throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data for entities applying filters:");
- // apply filters to the data fetched
+ // apply filters to fetched data to limit results
wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); // Only site links from English Wikipedia
wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); // Only labels in French
wbdf.getFilter().setPropertyFilter(Collections.emptySet()); // No statements at all
@@ -176,10 +181,37 @@ public static void fetchEntityAndApplyFilters(WikibaseDataFetcher wbdf)
* @implNote The file is written to the "/results/" directory in the project root.
*/
private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
- try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
- out.println(entityDocument);
- } catch (IOException e) {
- e.printStackTrace();
+ if (!printOutputToResultsDirectory) {
+ System.out.println(entityDocument);
+ } else {
+ try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
+ out.println(entityDocument);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ /**
+ * Writes the search results to the specified PrintStream.
+ *
+ * @param result The WbSearchEntitiesResult object containing the search results.
+ * @param out The PrintStream to write the results to.
+ */
+ private static void writeSearchResultsToFile(WbSearchEntitiesResult result, PrintStream out) {
+ String output = "RESULT " + result.getTitle() + " DETAILS:" +
+ "\nconcept_uri:" + result.getConceptUri() +
+ "\ndescription:" + result.getDescription() +
+ "\nentity_ID:" + result.getEntityId() +
+ "\nlabel:" + result.getLabel() +
+ "\npage_ID:" + result.getPageId() +
+ "\nQID:" + result.getTitle() +
+ "\nURL:" + result.getUrl() +
+ "\n";
+ if (!printOutputToResultsDirectory) {
+ System.out.println(output);
+ } else {
+ out.println(output);
}
}
@@ -197,6 +229,5 @@ public static void printDocumentation() {
System.out.println("*** It does not download any dump files.");
System.out
.println("********************************************************************");
-
}
}
From 1ec9d64807d3859ba008da4326c721dd1ada0442 Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Mon, 6 May 2024 18:24:46 -0600
Subject: [PATCH 08/10] adds missing javadoc comments, reformats for beginner
friendly readability
---
src/examples/FetchOnlineDataExample.java | 142 ++++++++++++++---------
1 file changed, 89 insertions(+), 53 deletions(-)
diff --git a/src/examples/FetchOnlineDataExample.java b/src/examples/FetchOnlineDataExample.java
index fe9b711..2d5b3e4 100644
--- a/src/examples/FetchOnlineDataExample.java
+++ b/src/examples/FetchOnlineDataExample.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.util.Collections;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -33,89 +34,124 @@
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
-public class FetchOnlineDataExample {
+/**
+ * This example demonstrates fetching data from the Wikidata.org API by their QID, title, or query and applying various filters to reduce the volume of data returned.
+ *
+ * This example does not download any dump files.
+ *
+ * - Fetches data for a single entity (& multiple entities) using a QID (unique Wikimedia identifier) and prints its title.
+ * - Fetches data for multiple entities using site-key ("enwiki") and multiple titles and prints their QID's
+ * - Searches for entities using a query (e.g. "Douglas Adams") and which language Wiki (or "IRI") to search, then prints the QID's and labels of the results.
+ * - Fetches data for a single entity using its QID, but limits the volume of data returned by first applying site-link, language, and property filters
+ *
+ *
+ *
+ * @throws MediaWikiApiErrorException
+ * @throws IOException
+
+ * @author Markus Kroetzsch
+ */
+public class FetchOnlineDataExample {
public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
ExampleHelpers.configureLogging();
- printDocumentation();
+ FetchOnlineDataExample.printDocumentation();
WikibaseDataFetcher wbdf = new WikibaseDataFetcher(
BasicApiConnection.getWikidataApiConnection(),
Datamodel.SITE_WIKIDATA);
+ FetchOnlineDataExample.fetchSingleEntityByQID(wbdf);
+ FetchOnlineDataExample.fetchEntitiesByTitles(wbdf);
+ FetchOnlineDataExample.fetchEntityResultsBySearchTerm(wbdf);
+ FetchOnlineDataExample.fetchEntityWithAppliedFilters(wbdf);
+ }
+
+ /**
+ * Fetches data for a single entity using its QID (unique identifier).
+ * The fetched data is then written to selected ouputs and prints the 'en' text title to the console.
+ */
+ public static void fetchSingleEntityByQID(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data for one entity:");
- EntityDocument q42 = wbdf.getEntityDocument("Q42");
- System.out.println(q42);
- if (q42 instanceof ItemDocument) {
- System.out.println("The English name for entity Q42 is "
- + ((ItemDocument) q42).getLabels().get("en").getText());
+ // Multiple entities can be fetched in one go by providing a list of QID's & using the getEntityDocuments() method.
+ EntityDocument entityDocument = wbdf.getEntityDocument("Q42");
+
+ if (entityDocument instanceof ItemDocument) {
+ System.out.println("The English name for entity Q42 is: "
+ + ((ItemDocument) entityDocument).getLabels().get("en").getText());
}
+ }
+
+ /**
+ * Fetches data for multiple enetities using their titles.
+ * The fetched data is then written to selected ouputs and prints the unique QID to the console.
+ */
+ public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Fetching data for entities by page titles:");
- System.out.println("*** Fetching data for several entities:");
- Map results = wbdf.getEntityDocuments("Q80",
- "P31");
- // Keys of this map are Qids, but we only use the values here:
- for (EntityDocument ed : results.values()) {
- System.out.println("Successfully retrieved data for "
- + ed.getEntityId().getId());
+ // Similar to above, single entities can be fetched by providing a single title & using the getEntityDocumentsByTitle() method.
+ Map entityDocuments = wbdf.getEntityDocumentsByTitle("enwiki", "Terry Pratchett",
+ "Neil Gaiman");
+
+ for (Entry entry : entityDocuments.entrySet()) {
+ System.out.println("The QID for the entity with page title \""
+ + entry.getKey() + "\" is: " + entry.getValue().getEntityId().getId());
}
+ }
- System.out
- .println("*** Fetching data using filters to reduce data volume:");
- // Only site links from English Wikipedia:
- wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki"));
- // Only labels in French:
- wbdf.getFilter().setLanguageFilter(Collections.singleton("fr"));
- // No statements at all:
- wbdf.getFilter().setPropertyFilter(Collections.emptySet());
+ /**
+ * Fetches results data for whichever entities match the search term for language code provided.
+ * The resultant data is then written to selected ouputs and prints the QID and label of the search results to the console.
+ */
+ public static void fetchEntityResultsBySearchTerm(WikibaseDataFetcher wbdf)
+ throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Doing search on Wikidata for: Douglas Adams");
+
+ // Search for entities using a search term and language code.
+ List searchResults = wbdf.searchEntities("Douglas Adams", "fr");
+ for (WbSearchEntitiesResult result : searchResults) {
+ System.out.println("Found entity with QID " + result.getEntityId() + " and label \""
+ + result.getLabel() + "\".");
+ }
+ }
+
+ /**
+ * Fetches data for a single entity using its QID and applies filters to reduce the volume of data returned.
+ * The fetched data is then written to selected ouputs and prints the French label and English Wikipedia page title to the console.
+ */
+ public static void fetchEntityWithAppliedFilters(WikibaseDataFetcher wbdf)
+ throws MediaWikiApiErrorException, IOException {
+ System.out.println("*** Fetching data using filters to reduce data volume:");
+
+ // apply filters to data to get only what we want...
+ wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); // Only site links from English Wikipedia
+ wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); // Only labels in French
+ wbdf.getFilter().setPropertyFilter(Collections.emptySet()); // No statements at all
+ // Fetch the entity data
EntityDocument q8 = wbdf.getEntityDocument("Q8");
+
if (q8 instanceof ItemDocument) {
System.out.println("The French label for entity Q8 is "
+ ((ItemDocument) q8).getLabels().get("fr").getText()
+ "\nand its English Wikipedia page has the title "
- + ((ItemDocument) q8).getSiteLinks().get("enwiki")
- .getPageTitle() + ".");
+ + ((ItemDocument) q8).getSiteLinks().get("enwiki").getPageTitle() + ".");
}
-
- System.out.println("*** Fetching data based on page title:");
- EntityDocument edPratchett = wbdf.getEntityDocumentByTitle("enwiki",
- "Terry Pratchett");
- System.out.println("The Qid of Terry Pratchett is "
- + edPratchett.getEntityId().getId());
-
- System.out.println("*** Fetching data based on several page titles:");
- results = wbdf.getEntityDocumentsByTitle("enwiki", "Wikidata",
- "Wikipedia");
- // In this case, keys are titles rather than Qids
- for (Entry entry : results.entrySet()) {
- System.out
- .println("Successfully retrieved data for page entitled \""
- + entry.getKey() + "\": "
- + entry.getValue().getEntityId().getId());
- }
-
- System.out.println("** Doing search on Wikidata:");
- for(WbSearchEntitiesResult result : wbdf.searchEntities("Douglas Adams", "fr")) {
- System.out.println("Found " + result.getEntityId() + " with label " + result.getLabel());
- }
-
- System.out.println("*** Done.");
}
/**
- * Prints some basic documentation about this program.
- */
+ * Prints some basic documentation about this program.
+ */
public static void printDocumentation() {
System.out
.println("********************************************************************");
System.out.println("*** Wikidata Toolkit: FetchOnlineDataExample");
System.out.println("*** ");
System.out
- .println("*** This program fetches individual data using the wikidata.org API.");
+ .println(
+ "*** This example demonstrates fetching data from the Wikidata.org API by their QID, title, or query and applying various filters to reduce the volume of data returned.");
System.out.println("*** It does not download any dump files.");
System.out
.println("********************************************************************");
}
-
-}
+}
\ No newline at end of file
From 7c2b727cd7f9af9055a8673d87b79bd790ea8474 Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Mon, 6 May 2024 18:25:16 -0600
Subject: [PATCH 09/10] remove ref
---
.../FetchOnlineDataExampleUpdate.java | 233 ------------------
1 file changed, 233 deletions(-)
delete mode 100644 src/examples/FetchOnlineDataExampleUpdate.java
diff --git a/src/examples/FetchOnlineDataExampleUpdate.java b/src/examples/FetchOnlineDataExampleUpdate.java
deleted file mode 100644
index 5ae6ecc..0000000
--- a/src/examples/FetchOnlineDataExampleUpdate.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package examples;
-
-/*
- * #%L
- * Wikidata Toolkit Examples
- * %%
- * Copyright (C) 2014 - 2015 Wikidata Toolkit Developers
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Map.Entry;
-
-import org.wikidata.wdtk.datamodel.helpers.Datamodel;
-import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
-import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
-import org.wikidata.wdtk.wikibaseapi.BasicApiConnection;
-import org.wikidata.wdtk.wikibaseapi.WbSearchEntitiesResult;
-import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
-import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
-
-/**
- * This example demonstrates fetching data from the Wikidata.org API including fetching entity data and applying various filters to reduce the volume of data returned.
- *
- * This example does not download any dump files.
- *
- * - Fetching data for a single entity and multiple entities, using entity Qids (e.g. "Q42" and "P31").
- * - Fetching by Page Title: Fetches data for a single entity with the page titles (e.g. "Terry Pratchett" on English Wikipedia), then prints the correspinding Qid.
- * - Searching for Entities: Searches for entities using a search term (e.g. "Douglas Adams") and which language Wiki (or "iri") to search (e.g. "fr"), then prints the Qids and labels of the search results.
- * - Fetching data for entities and applying filters to limit the volume of data returned. It sets filters for: site links, language, and propertities, then fetches data for a single entity with Qid "Q8", then prints its French label and English Wikipedia page title.
- *
- *
- * @implNote The results are written to the "/results/" directory in the project root.
- *
- * @throws MediaWikiApiErrorException
- * @throws IOException
-
- * @author Markus Kroetzsch
- */
-
-public class FetchOnlineDataExampleUpdate {
- private static boolean printOutputToResultsDirectory = false; // set to true to print the output in the results directory
-
- public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
- ExampleHelpers.configureLogging();
- FetchOnlineDataExampleUpdate.printDocumentation();
-
- WikibaseDataFetcher wbdf = new WikibaseDataFetcher(
- BasicApiConnection.getWikidataApiConnection(),
- Datamodel.SITE_WIKIDATA);
-
- fetchEntityDataByQid(wbdf); // get entities using their Qids...
- fetchEntitiesByTitles(wbdf); // get entities using their page titles...
- fetchEntityResultBySearchTerm(wbdf); // search entities using query text...
- fetchEntityAndApplyFilters(wbdf); // get entities applying filters...
- }
-
- /**
- * This method fetches data for a single entity from the Wikidata.org API using its QID (unique identifier).
- * The fetched data is then written to a file and the English name for the entity is printed to the console.
- *
- * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
- * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
- * @throws IOException If there is an error while writing the fetched data to a file.
-
- @see #WikibaseDataFetcher.getEntityDocuments(String[] qids)
- */
- public static void fetchEntityDataByQid(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
- System.out.println("*** Fetching data for one entity:");
-
- //? Multiple entities can be fetched using the plural getEntityDocuments() method.
- EntityDocument q42 = wbdf.getEntityDocument("Q42");
-
- if (q42 instanceof ItemDocument) {
- writeEntityDataToFile(q42, "entity-Q42.txt");
- System.out.println("The English name for entity Q42 is "
- + ((ItemDocument) q42).getLabels().get("en").getText());
- System.out.println("Raw data for entity Q42 written to file entity-Q42.txt");
- } else {
- System.out.println("Entity Q42 was not found!");
- }
- }
-
- /**
- * This method fetches data for a single entity from the Wikidata.org API using an entities title.
- * The fetched data is then written to a file and the Qid's and labels of the search results are printed to the console.
- *
- * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
- * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
- * @throws IOException If there is an error while writing the fetched data to a file.
- *
- * @see #WikibaseDataFetcher.getEntityDocumentByTitle(String siteIri, String pageTitle)
- */
- public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
- System.out.println("*** Fetching data based on page title:");
-
- //? Singular entities, similar to above, can be fetched using the singular getEntityDocumentByTitle() method.
- Map results = wbdf.getEntityDocumentsByTitle("enwiki",
- "Terry Pratchett", "Neil Gaiman");
- for (Entry entry : results.entrySet()) {
- writeEntityDataToFile(entry.getValue(), "entity-" + entry.getKey() + ".txt");
- System.out.println("The Qid of " + entry.getKey() + " is " + entry.getValue().getEntityId().getId());
- System.out
- .println("Raw data for entity " + entry.getKey() + " written to file entity-" + entry.getKey() + ".txt");
- }
- }
-
- /**
- * This method fetches data for a single entity from the Wikidata.org API using a search term and language target (a.k.a iri).
- * The fetched data's Qid and labels are then printed to the console.
- *
- * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
- * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
- * @throws IOException If there is an error while writing the fetched data to a file.
- */
- public static void fetchEntityResultBySearchTerm(WikibaseDataFetcher wbdf)
- throws MediaWikiApiErrorException, IOException {
- System.out.println("*** Searching for entities matching: 'Douglas Adams' ");
- try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream("search-results.txt"))) {
- for (WbSearchEntitiesResult result : wbdf.searchEntities("Douglas Adams", "fr")) {
- writeSearchResultsToFile(result, out);
- System.out.println("Found result " + result.getLabel() + " with Qid " + result.getTitle() + ".");
- }
- }
- System.out.println("Search results written to file search-results.txt");
- }
-
- /**
- * This method fetches data for a single entity, then applies filters to limit the data by selecting only site links from English Wikipedia, and labels in French which have no statements at all.
- * The fetched data is then written to a file and the French label and English Wikipedia page title are printed to the console.
- *
- * @param wbdf An instance of WikibaseDataFetcher that is used to fetch the data.
- * @throws MediaWikiApiErrorException If there is an error while fetching the data from the API.
- * @throws IOException If there is an error while writing the fetched data to a file.
- */
- public static void fetchEntityAndApplyFilters(WikibaseDataFetcher wbdf)
- throws MediaWikiApiErrorException, IOException {
- System.out.println("*** Fetching data for entities applying filters:");
-
- // apply filters to fetched data to limit results
- wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); // Only site links from English Wikipedia
- wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); // Only labels in French
- wbdf.getFilter().setPropertyFilter(Collections.emptySet()); // No statements at all
- EntityDocument q8 = wbdf.getEntityDocument("Q8");
-
- if (q8 instanceof ItemDocument) {
- writeEntityDataToFile(q8, "entity-Q8.txt");
- System.out.println("The French label for entity Q8 is "
- + ((ItemDocument) q8).getLabels().get("fr").getText()
- + "\nand its English Wikipedia page has the title "
- + ((ItemDocument) q8).getSiteLinks().get("enwiki").getPageTitle() + ".");
- System.out.println("Raw data for entity Q8 written to file entity-Q8.txt");
- } else {
- System.out.println("Entity Q8 was not found!");
- }
- }
-
- /**
- * This method writes a single EntityDocument object to a file. It uses a PrintStream to write the
- * EntityDocument to the file specified by fileName. If an IOException occurs during this process, it prints
- * the stack trace.
- *
- * @param entityDocument An EntityDocument object to be written to the file.
- * @param fileName The name of the file to which the entity data will be written.
- *
- * @implNote The file is written to the "/results/" directory in the project root.
- */
- private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
- if (!printOutputToResultsDirectory) {
- System.out.println(entityDocument);
- } else {
- try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
- out.println(entityDocument);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
-
- /**
- * Writes the search results to the specified PrintStream.
- *
- * @param result The WbSearchEntitiesResult object containing the search results.
- * @param out The PrintStream to write the results to.
- */
- private static void writeSearchResultsToFile(WbSearchEntitiesResult result, PrintStream out) {
- String output = "RESULT " + result.getTitle() + " DETAILS:" +
- "\nconcept_uri:" + result.getConceptUri() +
- "\ndescription:" + result.getDescription() +
- "\nentity_ID:" + result.getEntityId() +
- "\nlabel:" + result.getLabel() +
- "\npage_ID:" + result.getPageId() +
- "\nQID:" + result.getTitle() +
- "\nURL:" + result.getUrl() +
- "\n";
- if (!printOutputToResultsDirectory) {
- System.out.println(output);
- } else {
- out.println(output);
- }
- }
-
- /**
- * Prints some basic documentation about this program.
- */
- public static void printDocumentation() {
- System.out
- .println("********************************************************************");
- System.out.println("*** Wikidata Toolkit: FetchOnlineDataExample");
- System.out.println("*** ");
- System.out
- .println(
- "*** This example demonstrates fetching data from the Wikidata.org API and covers fetching entity data and applying various filters to reduce the volume of data returned.");
- System.out.println("*** It does not download any dump files.");
- System.out
- .println("********************************************************************");
- }
-}
From 9e9b56d8a6a7cedcccb201a0fa016362b2ad34bf Mon Sep 17 00:00:00 2001
From: Horacio Velvetine <78946408+horaciovelvetine@users.noreply.github.com>
Date: Mon, 6 May 2024 21:17:19 -0600
Subject: [PATCH 10/10] Adds helpful functionality exploitng helpers to write
results to a local file for local examination
---
src/examples/FetchOnlineDataExample.java | 60 ++++++++++++++++++++++--
1 file changed, 57 insertions(+), 3 deletions(-)
diff --git a/src/examples/FetchOnlineDataExample.java b/src/examples/FetchOnlineDataExample.java
index 2d5b3e4..cb56a8c 100644
--- a/src/examples/FetchOnlineDataExample.java
+++ b/src/examples/FetchOnlineDataExample.java
@@ -21,6 +21,7 @@
*/
import java.io.IOException;
+import java.io.PrintStream;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -53,6 +54,8 @@
*/
public class FetchOnlineDataExample {
+ private static boolean printDataToResultsDirectory = false; // Set to true to write the full data output to the 'results' directory.
+
public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
ExampleHelpers.configureLogging();
FetchOnlineDataExample.printDocumentation();
@@ -80,7 +83,9 @@ public static void fetchSingleEntityByQID(WikibaseDataFetcher wbdf) throws Media
if (entityDocument instanceof ItemDocument) {
System.out.println("The English name for entity Q42 is: "
+ ((ItemDocument) entityDocument).getLabels().get("en").getText());
+ writeEntityDataToFile(entityDocument, "single-entity.txt");
}
+
}
/**
@@ -97,6 +102,7 @@ public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaW
for (Entry entry : entityDocuments.entrySet()) {
System.out.println("The QID for the entity with page title \""
+ entry.getKey() + "\" is: " + entry.getValue().getEntityId().getId());
+ writeEntityDataToFile(entry.getValue(), "multiple-entities-" + entry.getKey() + ".txt");
}
}
@@ -110,9 +116,14 @@ public static void fetchEntityResultsBySearchTerm(WikibaseDataFetcher wbdf)
// Search for entities using a search term and language code.
List searchResults = wbdf.searchEntities("Douglas Adams", "fr");
- for (WbSearchEntitiesResult result : searchResults) {
- System.out.println("Found entity with QID " + result.getEntityId() + " and label \""
- + result.getLabel() + "\".");
+ if (printDataToResultsDirectory) {
+ try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream("search-results.txt"))) {
+ for (WbSearchEntitiesResult result : searchResults) {
+ writeSearchResutlsToFile(result, out);
+ System.out.println("Found entity with QID " + result.getEntityId() + " and label \""
+ + result.getLabel() + "\".");
+ }
+ }
}
}
@@ -136,6 +147,49 @@ public static void fetchEntityWithAppliedFilters(WikibaseDataFetcher wbdf)
+ ((ItemDocument) q8).getLabels().get("fr").getText()
+ "\nand its English Wikipedia page has the title "
+ ((ItemDocument) q8).getSiteLinks().get("enwiki").getPageTitle() + ".");
+ writeEntityDataToFile(q8, "filtered-entity.txt");
+ }
+ }
+
+ /**
+ * Uses a PrintStream and the included ExampleHelpers class to write the entity data to the included file name.
+ *
+ * @param entityDocument The entity document to write to the file.
+ * @param fileName The name of the file to write the entity data to.
+ */
+ private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
+ if (!printDataToResultsDirectory) // skip writing to file if not enabled
+ return;
+
+ try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
+ out.println(entityDocument);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Writes the search results to the specified PrintStream.
+ *
+ * @param result The WbSearchEntitiesResult object containing the search results.
+ * @param out The PrintStream to write the results to.
+ */
+ private static void writeSearchResutlsToFile(WbSearchEntitiesResult result, PrintStream out) {
+ if (!printDataToResultsDirectory) // skip writing to file if not enabled
+ return;
+
+ String output = "RESULT " + result.getTitle() + " DETAILS:" +
+ "\nconcept_uri:" + result.getConceptUri() +
+ "\ndescription:" + result.getDescription() +
+ "\nentity_ID:" + result.getEntityId() +
+ "\nlabel:" + result.getLabel() +
+ "\npage_ID:" + result.getPageId() +
+ "\nQID:" + result.getTitle() +
+ "\nURL:" + result.getUrl() +
+ "\n";
+
+ if (printDataToResultsDirectory) {
+ out.println(output);
}
}