This Live Notebook is created by Owais Ahmad, contact and questions: owaiskhan9654.github.io
!pip install elasticsearch
# Import packages
import numpy as np
import pandas as pd
import re
import ijson
from pprint import pprint
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
from elasticsearch import Elasticsearch
# Elastic search configuation
es = Elasticsearch(HOST="http://localhost", PORT=9200)
%%time
abstractText=[]
meshMajor=[]
pmid=[]
title=[]
journal=[]
year=[]
count=0 #you may increase the value of count in order to
f = open(r"D:/Lab Backup by Sushil/OWAIS/BIO ASQ DATASET TASK A/allMeSH_2021.json")
objects = ijson.items(f, 'articles.item')
for obj in tqdm(objects):
abstractText.append(obj["abstractText"].strip())
meshMajor.append(obj["meshMajor"])
pmid.append(obj["pmid"])
title.append(obj['title'])
journal.append(obj['journal'])
year.append(obj['year'])
count =count +1
if count==10000:
break
data = pd.DataFrame({'abstractText': abstractText, 'journal':journal,'meshMajor': meshMajor,'pmid':pmid,'title':title,'year':year})
data=data.to_dict(orient='records')
pprint(data[509]) #Random dataset at index 509
%%time
i=-1
for a_data in tqdm(data):
i=i+1
result=es.index(index='bioasq_task_9a',body=a_data,id=i)
pprint(result)
print('\n\nThis is only showing last inserted element')
for i in tqdm(range(len(data))):
result=es.get(index="bioasq_task_9a",id=i)
pprint(result)
print('\n\nSize of this notebook will become to large so I only printing the element which is last inserted')
#es.indices.delete(index="bioasq_task_9a")
print(es.indices.get_alias("*")) #To show how many indices are totally present
# Creating Query Function
## Match Query
def Elastic_ser(Query="COVID-19",Result_size=2): #Default Query as COVID-19 and Default Result Size as 2
body = {
"from":0,
"size":Result_size,
"query": {
"match": {
"meshMajor":Query
}
}
}
results = es.search(index="bioasq_task_9a", body=body)
return(results)
Elastic_ser('SARS-CoV-2',1)
body = {
"from":0,
"size":1, #change this inorder to increase the result size
"query": {
"bool": {
"must_not": {
"match": {
"meshMajor":"COVID-19"
}
},
"should": {
"match": {
"meshMajor": "Betacoronavirus"
}
},
"must": {
"match": {
"meshMajor": "Pneumonia, Viral"
},
}
}
}
}
res = es.search(index="bioasq_task_9a", body=body)
res