Skip to content

Commit

Permalink
add some wikiQuote methods and html helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Perzl committed Oct 22, 2017
1 parent aa9e33f commit d9f354d
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 2 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
"@akanass/rx-http-request": "^2.6.0",
"alexa-sdk": "^1.0.15",
"bestzip": "^1.1.4",
"cheerio": "^1.0.0-rc.2",
"rxjs": "^5.5.0"
},
"devDependencies": {
"@types/alexa-sdk": "^1.0.10",
"@types/cheerio": "^0.22.3",
"@types/jasmine": "^2.6.0",
"jasmine": "^2.8.0",
"s3-cli": "^0.13.0",
Expand Down
37 changes: 36 additions & 1 deletion src/wikiquote.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {Wikiquote} from "./wikiquote"
import {Wikiquote, Section, SectionIndexes, Quotes} from "./wikiquote"

describe("Wikiquote", function() {

Expand All @@ -12,4 +12,39 @@ describe("Wikiquote", function() {
);
});

it("getRandomId returns id of article", function(done) {
Wikiquote.getRandomId().subscribe(
(id: number) => {
expect(id).toBeGreaterThan(0);
done();
}
);
});

it("getSections returns sections of article", function(done) {
const id: number = 11810;
Wikiquote.getSectionIndexes(id).subscribe(
(sections: SectionIndexes) => {
expect(sections.sections.length).toBeGreaterThanOrEqual(1);
let sectionTitle: string = sections.titles;
expect(sectionTitle).toBe("Gespenst");
done();
}
);
});

it("getQuotes returns quotes of article", function(done) {
const id: number = 11810;
Wikiquote.getQuotesForSection(id, "1").subscribe(
(quotes: string[]) => {
expect(quotes.length).toBeGreaterThan(0);
let quote: string = quotes[0];
expect(quote).toBeDefined();
expect(quote.length).toBeGreaterThan(0);
done();
}
);
});


});
83 changes: 82 additions & 1 deletion src/wikiquote.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,97 @@
import {Observable} from "rxjs";
import {RxHR, RxHttpRequestResponse} from "@akanass/rx-http-request";
import * as cheerio from "cheerio";

export interface Section {
anchor: string,
byteoffset: number,
fromtitle: string,
index: string,
level: string,
line: string,
number: string,
tocLevel: number
}

export interface SectionIndexes {
titles: string,
sections: string[]
}

export class Wikiquote {

private static endpoint: string = "https://de.wikiquote.org/w/api.php";

// https://de.wikiquote.org/wiki/Spezial:ApiSandbox#action=parse&format=json&pageid=11810&prop=sections

public static getRandomId(): Observable<number> {
return RxHR.get(Wikiquote.endpoint+"?action=query&list=random&format=json&rnnamespace=0")
.map( (data: RxHttpRequestResponse, idx: number) => {
let reply = JSON.parse(data.body);
return reply.query.random[0].id;
});
}

public static getSectionIndexes(pageId: number): Observable<SectionIndexes> {
let uri: string = Wikiquote.endpoint+"?action=parse&format=json&pageid="+pageId+"&prop=sections"
console.log(uri);
return RxHR.get(uri)
.map( (data: RxHttpRequestResponse, idx: number) => {
let reply = JSON.parse(data.body);
let sectionsIn: Section[] = reply.parse.sections;
let sectionsOut: any[] =[];
for(let section of sectionsIn) {
var splitNum = section.number.split('.');
if(splitNum.length > 1 && splitNum[0] === "1") {
sectionsOut.push(section.index);
}
}
// Use section 1 if there are no "1.x" sections
if(sectionsOut.length === 0) {
sectionsOut.push("1");
}
return { titles: reply.parse.title, sections: sectionsOut };
});
}

public static getRandomTitle(): Observable<string> {
console.log("getting random title");
return RxHR.get("https://en.wikiquote.org/w/api.php?action=query&list=random&format=json")
return RxHR.get("https://de.wikiquote.org/w/api.php?action=query&list=random&format=json&rnnamespace=0")
.map( (data: RxHttpRequestResponse, idx: number) => {
console.log("body: "+data.body);
let reply = JSON.parse(data.body);
return reply.query.random[0].title;
});
}

private static htmlToText(inputHtml: string): string{
const loaded: CheerioStatic = cheerio.load(inputHtml);
let parsed: Cheerio = loaded('*');
return parsed.text();
}

private static getAllTopLevelLis(input: string): string[]{
let output: string[] = [];
const cheerioContent: CheerioStatic = cheerio.load(input);
let lis: Cheerio = cheerioContent('li:not(li li)');
lis.each( (i: number, element: CheerioElement)=>{
let content: CheerioStatic = cheerio.load(element.children[0]);
let h: string = content.html();
output.push(h);
} );
return output;
}

public static getQuotesForSection(pageId: number, sectionIndex: string): Observable<string[]> {
let uri: string = Wikiquote.endpoint+"?action=parse&format=json&pageid="+pageId+"&noimages=&section="+sectionIndex;
console.log(uri);
return RxHR.get(uri)
.map( (data: RxHttpRequestResponse, idx: number) => {
let reply = JSON.parse(data.body);
let content = reply.parse.text["*"];
let lis: string[] = Wikiquote.getAllTopLevelLis(content);
let lisText: string[] = lis.map(Wikiquote.htmlToText);
return lisText;
});
}
}

0 comments on commit d9f354d

Please sign in to comment.