conda create -n fishing
source activate fishing
conda install numpy pip pandas
/home/jtimm/anaconda3/envs/fishing/bin/pip install spacyfishing
python -m spacy download en_core_web_sm
A quick demo using the spacyfishing library – “a spaCy wrapper for Entity-Fishing, a tool for named entity recognition, linking and disambiguation against Wikidata.” Facilitates disambiguating/linking named entities to the Wikidata knowledge base.
1 Reticulate & Python
Sys.setenv(RETICULATE_PYTHON = "/home/jtimm/anaconda3/envs/fishing/bin/python")
::use_condaenv(condaenv = "fishing",
reticulateconda = "/home/jtimm/anaconda3/bin/conda")
2 Build a simple news corpus
<- quicknews::qnews_build_rss('war in ukraine') |>
qn ::qnews_strip_rss()
quicknews
1:5,1:3] |> knitr::kable() qn[
date | source | title |
---|---|---|
2022-06-28 | NPR | Russia-Ukraine war: What happened today (June 28) |
2022-06-28 | The Washington Post | Latest Russia-Ukraine war news: Live updates |
2022-06-28 | VOA News | Latest Developments in Ukraine: June 28 |
2022-06-28 | The New York Times | The West Seeks a More Effective Way to Tighten Sanctions on Russia |
2022-06-28 | CNN | Russia’s war in Ukraine: Live updates |
<- quicknews::qnews_extract_article(qn$link[1:3], cores = 3)
arts <- arts$text[1] text_en
3 spaCy
import spacy
= spacy.load("en_core_web_sm")
nlp "entityfishing", config={"extra_info": True})
nlp.add_pipe('sentencizer') nlp.add_pipe(
= nlp(r.text_en) doc
3.1 Entities to Wikipedia
import pandas as pd
= [(e.label_,
entities
e.text,
e._.normal_term,
e._.kb_qid,
e._.url_wikidata,
e._.nerd_score,for e in doc.ents]
e._.description)
= pd.DataFrame(entities,
df99 =['type',
columns'entity',
'normed',
'qid',
'url',
'score',
'description'])
::py$df99 |>
reticulate::select(-description) |>
dplyr::datatable(rownames = F) DT
3.2 Wikidata description
strwrap(reticulate::py$df99$description[[3]], width = 60)[1:10]
[1] "'''Kremenchuk''' (, ;, [[Romanization of"
[2] "Russian|translit.]] ''Kremenchug''), an important"
[3] "industrial [[city]] in central [[Ukraine]], stands on the"
[4] "banks of the [[Dnieper]] River. Kremenchuk is the [[Capital"
[5] "city|administrative center]] of the [[Kremenchuk Raion]]"
[6] "([[Raion|district]]) in [[Poltava Oblast]]"
[7] "([[Oblast|province]]). Kremenchuk is administratively"
[8] "incorporated as a [[City of regional significance"
[9] "(Ukraine)|city of oblast significance]] and does not belong"
[10] "to the raion. Population: Along with [[Svitlovodsk]] and"
4 displaCy
from spacy import displacy
= list(doc.sents)
ss
4], style="ent") displacy.render(ss[:
A photograph taken TuesdayDATEshows charred goods in a grocery store of the destroyed Amstor mallPERSONin KremenchukLOC, central UkraineGPE, one dayDATEafter it was hit by a RussianNORPmissile strike.
The death toll climbed to at least 20CARDINALafter MondayDATE's missile attack on a crowded mall in the central UkrainianGPEcity of KremenchukORG, which leaders at a Group of SevenORGmeeting called a "war crime."
On TuesdayDATE, emergency responders ended a rescue search for survivors.
RussiaGPE's government denied hitting the shopping center, claiming it caught fire after RussiaGPEstruck a nearby weapons depot.
’