main.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. import init, {
  2. initialize,
  3. generate_public_parameters,
  4. generate_query,
  5. decode_response
  6. } from '../pkg/client.js';
  7. import './bz2.js';
  8. import './wtf_wikipedia.js';
  9. import './wtf-plugin-html.js';
  10. wtf.extend(wtfHtml);
  11. const API_URL = "/api";
  12. const CHECK_URL = "/check";
  13. const SETUP_URL = "/setup";
  14. const QUERY_URL = "/query";
  15. async function postData(url = '', data = {}, json = false) {
  16. // const response = await fetch(url, {
  17. // method: 'POST',
  18. // mode: 'cors',
  19. // cache: 'no-store',
  20. // credentials: 'omit',
  21. // headers: {
  22. // 'Content-Type': 'application/octet-stream',
  23. // 'Content-Length': data.length
  24. // },
  25. // redirect: 'follow',
  26. // referrerPolicy: 'no-referrer',
  27. // body: data
  28. // });
  29. // if (json) {
  30. // return response.json();
  31. // } else {
  32. // let data = await response.arrayBuffer();
  33. // return new Uint8Array(data);
  34. // }
  35. // Can't use Fetch API here since it lacks progress indication
  36. const xhr = new XMLHttpRequest();
  37. xhr.responseType = json ? 'json' : 'arraybuffer';
  38. return await new Promise((resolve, reject) => {
  39. xhr.upload.addEventListener("progress", (event) => {
  40. if (event.lengthComputable) {
  41. setProgress(Math.round(event.loaded / event.total * 100))
  42. }
  43. });
  44. xhr.addEventListener("loadend", () => {
  45. resolve(xhr.readyState === 4 && xhr.status === 200);
  46. });
  47. xhr.onload = function () {
  48. if (xhr.status >= 200 && xhr.status < 300) {
  49. resolve(xhr.response);
  50. } else {
  51. reject({
  52. status: xhr.status,
  53. statusText: xhr.statusText
  54. });
  55. }
  56. };
  57. xhr.onerror = function () {
  58. reject({
  59. status: xhr.status,
  60. statusText: xhr.statusText
  61. });
  62. };
  63. xhr.open("POST", url, true);
  64. xhr.setRequestHeader("Content-Type", "application/octet-stream");
  65. xhr.send(new Blob([data.buffer]));
  66. });
  67. }
  68. async function getData(url = '', json = false) {
  69. const response = await fetch(url, {
  70. method: 'GET',
  71. cache: 'default',
  72. credentials: 'omit',
  73. redirect: 'follow',
  74. referrerPolicy: 'no-referrer'
  75. });
  76. if (json) {
  77. return response.json();
  78. } else {
  79. let data = await response.arrayBuffer();
  80. return new Uint8Array(data);
  81. }
  82. }
  83. const api = {
  84. check: async (uuid) => getData(API_URL + CHECK_URL + "?uuid="+uuid, true),
  85. setup: async (data) => postData(API_URL + SETUP_URL, data, true),
  86. query: async (data) => postData(API_URL + QUERY_URL, data, false)
  87. }
  88. function extractTitle(article) {
  89. var title = "";
  90. var endTitleTagIdx = article.indexOf("</title>");
  91. if (endTitleTagIdx != -1) {
  92. title = article.slice(0, endTitleTagIdx);
  93. }
  94. return title;
  95. }
  96. function preprocessWikiText(wikiText, targetTitle) {
  97. targetTitle = targetTitle.toLowerCase();
  98. let articles = wikiText.split("<title>")
  99. .filter(d => d.length > 10)
  100. .filter(d => {
  101. return extractTitle(d).toLowerCase() == targetTitle;
  102. });
  103. if (articles.length === 0) {
  104. console.log("error decoding...");
  105. return "";
  106. }
  107. let d = articles[0];
  108. let title = extractTitle(d);
  109. let articlePageMatch = d.match(/<text>/);
  110. if (!articlePageMatch) {
  111. console.log("error decoding...");
  112. return "";
  113. }
  114. let startPageContentIdx = articlePageMatch.index + articlePageMatch[0].length;
  115. let endPageContentIdx = d.slice(startPageContentIdx).indexOf("</text>")
  116. d = d.slice(startPageContentIdx, endPageContentIdx);
  117. d = d
  118. .replace(/&lt;ref[\s\S]{0,500}?&lt;\/ref&gt;/gi, "")
  119. .replace(/&lt;ref[\s\S]{0,500}?\/&gt;/gi, "")
  120. .replace(/&lt;ref&gt;[\s\S]{0,500}?&lt;\/ref&gt;/gi, "")
  121. .replace(/&lt;![\s\S]{0,500}?--&gt;/gi, "");
  122. return {
  123. "wikiText": d,
  124. "title": title
  125. };
  126. }
  127. function postProcessWikiHTML(wikiHTML, title) {
  128. wikiHTML = wikiHTML.replace(/<img.*?\/>/g, "");
  129. wikiHTML = "<h2 class=\"title\">"+title+"</h2>" + wikiHTML
  130. return wikiHTML;
  131. }
  132. function resultToHtml(result, title) {
  133. let decompressedData = bz2.decompress(result);
  134. let wikiText = new TextDecoder("utf-8").decode(decompressedData);
  135. let processedData = preprocessWikiText(wikiText, title);
  136. let wikiHTML = wtf(processedData.wikiText).html();
  137. wikiHTML = postProcessWikiHTML(wikiHTML, processedData.title);
  138. return "<article>" + wikiHTML + "</article>";
  139. }
  140. window.resultToHtml = resultToHtml;
  141. function addBold(suggestion, query) {
  142. return '<span class="highlight">'
  143. + suggestion.slice(0,query.length)
  144. + "</span>"
  145. + suggestion.slice(query.length);
  146. }
  147. function showSuggestionsBox(suggestions, query) {
  148. var htmlSuggestions = '<div class="suggestions">'
  149. + suggestions.map(m => "<div>"+addBold(m, query)+"</div>").join('')
  150. + "</div>";
  151. document.querySelector('.searchbutton').insertAdjacentHTML('afterend', htmlSuggestions);
  152. document.querySelectorAll('.suggestions > div').forEach((el) => {
  153. el.onclick = (e) => {
  154. document.querySelector(".searchbox").value = el.innerHTML
  155. .replace('<span class="highlight">', '')
  156. .replace('</span>', '');
  157. clearExistingSuggestionsBox();
  158. document.querySelector('#make_query').click();
  159. }
  160. });
  161. }
  162. function clearExistingSuggestionsBox() {
  163. var existing = document.querySelector('.suggestions');
  164. if (existing) {
  165. existing.remove();
  166. }
  167. }
  168. function hasTitle(title) {
  169. return title && window.title_index.hasOwnProperty(title) && window.title_index[title] < window.numArticles;
  170. }
  171. function followRedirects(title) {
  172. if (hasTitle(title)) {
  173. return title;
  174. } else if (window.redirects.hasOwnProperty(title) && hasTitle(window.redirects[title])) {
  175. return window.redirects[title];
  176. } else {
  177. return null;
  178. }
  179. }
  180. function startLoading(message, hasProgress) {
  181. window.loading = true;
  182. window.started_loading = Date.now();
  183. if (hasProgress) {
  184. document.querySelector(".progress").classList.remove("off");
  185. document.querySelector(".loading-icon").classList.add("off");
  186. } else {
  187. document.querySelector(".progress").classList.add("off");
  188. document.querySelector(".loading-icon").classList.remove("off");
  189. document.querySelector(".loading-icon").classList.remove("hidden");
  190. }
  191. document.querySelector(".loading .message").innerHTML = message+"...";
  192. document.querySelector(".loading .message").classList.add("inprogress");
  193. }
  194. function stopLoading(message) {
  195. window.loading = false;
  196. document.querySelector(".loading-icon").classList.add("hidden");
  197. let seconds = (Date.now() - window.started_loading) / 1000
  198. let secondsRounded = Math.round(seconds * 100) / 100;
  199. let timingMessage = secondsRounded > 0.01 ? (" Took "+secondsRounded+"s.") : "";
  200. document.querySelector(".loading .message").innerHTML = "Done " + message.toLowerCase() + "." + timingMessage;
  201. document.querySelector(".loading .message").classList.remove("inprogress");
  202. }
  203. function queryTitleOnClick(title, displayTitle) {
  204. return async (e) => {
  205. e.preventDefault();
  206. document.querySelector(".searchbox").value = displayTitle;
  207. window.scrollTo(0, 0);
  208. queryTitle(title);
  209. return false;
  210. }
  211. }
  212. function enableLinks(element) {
  213. element.querySelectorAll('a').forEach((el) => {
  214. let displayTitle = el.getAttribute("href").slice(2).replace(/_/g, " ");
  215. let linkTitle = displayTitle.toLowerCase();
  216. if (hasTitle(linkTitle)) {
  217. el.onclick = queryTitleOnClick(linkTitle, displayTitle);
  218. } else {
  219. var redirected = followRedirects(linkTitle);
  220. if (redirected !== null && hasTitle(redirected)) {
  221. el.onclick = queryTitleOnClick(redirected, displayTitle);
  222. } else {
  223. el.classList.add("broken")
  224. }
  225. }
  226. })
  227. }
  228. const DB_NAME = 'spiralKey';
  229. const KEY_SIZE = 32;
  230. const MAX_VALID_TIME = 604800000; // 1 week
  231. async function arrayBufferToBase64(data) {
  232. const base64url = await new Promise((r) => {
  233. const reader = new FileReader()
  234. reader.onload = () => r(reader.result)
  235. reader.readAsDataURL(new Blob([data]))
  236. })
  237. return base64url.split(",", 2)[1]
  238. }
  239. function base64ToArrayBuffer(str) {
  240. return Uint8Array.from(atob(str), c => c.charCodeAt(0));
  241. }
  242. async function storeState(key, uuid) {
  243. console.log(key);
  244. let dataToStore = {
  245. "key": await arrayBufferToBase64(key),
  246. "uuid": uuid,
  247. "createdAt": Date.now()
  248. }
  249. window.localStorage[DB_NAME] = JSON.stringify(dataToStore);
  250. }
  251. function retrieveState() {
  252. if (!window.localStorage || !window.localStorage[DB_NAME]) return false;
  253. let state = JSON.parse(window.localStorage[DB_NAME]);
  254. state["key"] = base64ToArrayBuffer(state["key"]);
  255. return state;
  256. }
  257. function setStateFromKey(key) {
  258. console.log("Initializing...");
  259. window.key = key;
  260. window.client = initialize(undefined, key);
  261. console.log("done");
  262. console.log("Generating public parameters...");
  263. window.publicParameters = generate_public_parameters(window.client);
  264. console.log(`done (${publicParameters.length} bytes)`);
  265. }
  266. async function isStateValid(state) {
  267. console.log("Checking if cached state is still valid")
  268. if (Date.now() - state.createdAt > MAX_VALID_TIME) return false;
  269. let isValidResponse = await api.check(state.uuid);
  270. let isValid = isValidResponse.is_valid;
  271. if (!isValid) return false;
  272. return true;
  273. }
  274. async function setUpClient() {
  275. let state = retrieveState();
  276. if (state && await isStateValid(state)) {
  277. console.log("Loading previous client state")
  278. setStateFromKey(state.key);
  279. window.id = state.uuid;
  280. return true;
  281. } else {
  282. console.log("No state stored, generating new client state")
  283. let key = new Uint8Array(KEY_SIZE);
  284. self.crypto.getRandomValues(key);
  285. setStateFromKey(key);
  286. return false;
  287. }
  288. }
  289. async function uploadState() {
  290. startLoading("Uploading setup data", true);
  291. console.log("Sending public parameters...");
  292. let setup_resp = await api.setup(window.publicParameters);
  293. console.log("sent.");
  294. let id = setup_resp["id"];
  295. stopLoading("Uploading setup data");
  296. await storeState(window.key, id);
  297. return id;
  298. }
  299. async function query(targetIdx, title) {
  300. if (!window.hasSetUp) {
  301. let id = await uploadState();
  302. if (!id) return false;
  303. window.hasSetUp = true;
  304. window.id = id;
  305. }
  306. startLoading("Loading article");
  307. console.log("Generating query... ("+targetIdx+")");
  308. let query = generate_query(window.client, window.id, targetIdx);
  309. console.log(`done (${query.length} bytes)`);
  310. console.log("Sending query...");
  311. let response = new Uint8Array(await api.query(query));
  312. console.log("sent.");
  313. console.log(`done, got (${response.length} bytes)`);
  314. console.log("Decoding result...");
  315. let result = decode_response(window.client, response)
  316. console.log("done.")
  317. console.log("Final result:")
  318. console.log(result);
  319. let resultHtml = resultToHtml(result, title);
  320. let outputArea = document.getElementById("output");
  321. outputArea.innerHTML = resultHtml;
  322. enableLinks(outputArea);
  323. stopLoading("Loading article");
  324. }
  325. async function queryTitle(targetTitle) {
  326. let redirectedTitle = followRedirects(targetTitle);
  327. let articleIndex = window.title_index[redirectedTitle];
  328. return await query(articleIndex, targetTitle);
  329. }
  330. async function run() {
  331. startLoading("Initializing");
  332. await init();
  333. stopLoading("Initializing");
  334. window.numArticles = 65536;
  335. window.articleSize = 100000;
  336. let makeQueryBtn = document.querySelector('#make_query');
  337. let searchBox = document.querySelector(".searchbox");
  338. document.querySelector(".sidebar-collapse-btn").onclick = () => {
  339. document.querySelector(".sidebar").classList.toggle("collapsed");
  340. }
  341. startLoading("Loading article titles");
  342. let title_index_p = getData("data/enwiki-20220320-index.json", true);
  343. let redirect_backlinks_p = getData("data/redirects-old.json", true);
  344. let setupClientResult = setUpClient();
  345. window.title_index = await title_index_p;
  346. let keys = Object.keys(window.title_index);
  347. for (var i = 0; i < keys.length; i++) {
  348. let key = keys[i];
  349. window.title_index[key] /= window.articleSize;
  350. window.title_index[key.toLowerCase()] = window.title_index[key];
  351. }
  352. let redirect_backlinks = await redirect_backlinks_p;
  353. keys = Object.keys(redirect_backlinks);
  354. window.redirects = {}
  355. for (var i = 0; i < keys.length; i++) {
  356. let redirect_dest = keys[i];
  357. let redirect_srcs = redirect_backlinks[redirect_dest];
  358. for (var j = 0; j < redirect_srcs.length; j++) {
  359. window.redirects[redirect_srcs[j].toLowerCase()] = redirect_dest;
  360. }
  361. }
  362. window.hasSetUp = await setupClientResult;
  363. stopLoading("Loading article titles");
  364. searchBox.addEventListener('input', (e) => {
  365. clearExistingSuggestionsBox();
  366. let search = e.target.value;
  367. if (search.length < 1) return;
  368. var matching = Object.keys(window.title_index).filter((v) => v.startsWith(search));
  369. if (matching.length == 0) return;
  370. matching.sort();
  371. if (matching.length > 10) matching = matching.slice(0, 10);
  372. showSuggestionsBox(matching, search);
  373. })
  374. makeQueryBtn.onclick = async () => {
  375. makeQueryBtn.disabled = true;
  376. await queryTitle(searchBox.value);
  377. makeQueryBtn.disabled = false;
  378. }
  379. }
  380. run();
  381. function setProgress(progress) {
  382. document.querySelector(".progress").style.background =
  383. "conic-gradient(#333 " +
  384. progress +
  385. "%,#fff " +
  386. progress +
  387. "%)";
  388. // document.getElementById("middle-circle").innerHTML =
  389. // progress.toString() + "%";
  390. }
  391. window.setProgress = setProgress;