-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreateIndex.py
More file actions
66 lines (54 loc) · 2.15 KB
/
createIndex.py
File metadata and controls
66 lines (54 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import codecs
from os import walk, mkdir
import os.path
from whoosh.index import create_in, open_dir
from whoosh.fields import *
from whoosh.qparser import QueryParser
from whoosh.query import Term
from django.conf import settings
basePath = settings.BASE_PATH
targetPath = settings.INDEX_DIR+"/woosterapp"
ext_list = settings.EXT_LIST
filterFileType = "hpp"
def start(mypath, writer):
for (dirpath, dirnames, filenames) in walk(mypath):
for f in filenames:
ext = f.split(".")[-1]
if (ext in ext_list):
completePath = os.path.join(dirpath, f)
print f,dirpath
with codecs.open(completePath, encoding='utf-8', errors='ignore') as content_file:
cont = content_file.read()
writer.add_document(path=unicode(completePath), filetype=unicode(ext),
uipath=unicode(completePath[len(basePath)+1:]), content=cont)
writer.commit()
schema = Schema(path=ID(stored=True),filetype=TEXT(stored=True), uipath=TEXT, content=TEXT)
#if not os.path.exists("indexdir"):
# os.mkdir("indexdir")
ix = create_in(targetPath, schema)
writer = ix.writer()
start(basePath, writer)
#ix = open_dir("../whooshter_external_files/indexdir",schema=schema)
with ix.searcher() as searcher:
#search file content
query = QueryParser("content", ix.schema).parse(u"adobe")
filter_q = Term("filetype", filterFileType)
results = searcher.search(query, limit=None)
print results.scored_length(), len(results)
for result in results:
print result['filetype']
print type(result)
dn = result.docnum
print results[dn]
reader = ix.reader()
# with codecs.open(result["path"], encoding='utf-8') as fileobj:
# filecontents = fileobj.read()
# print(result.highlights("content", text=filecontents))
#search file name
# query1 = QueryParser("uipath", ix.schema).parse(u"ftw")
# results1 = searcher.search(query1)
# print len(results1)
# print results1[0]
# print results1[0]['path']
# Use this for paged searching
#s.search_page(q, 5, pagelen=20)