diff --git a/.circleci/config.yml b/.circleci/config.yml index 7c4d42b..44c71c1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -56,6 +56,8 @@ jobs: . venv/bin/activate python riddle2.py -t - - store_artifacts: - path: test-reports - destination: test-reports + - run: + name: run tests for sher.py + command: | + . venv/bin/activate + python sher.py -d . -q "." diff --git a/README.md b/README.md index 59934b8..9f4016e 100644 --- a/README.md +++ b/README.md @@ -42,4 +42,22 @@ Options: -l, --loop Continuing download loop. When this option is set every 5 Minutes the program searches for new images -``` \ No newline at end of file +``` + +## sher.py +Searches for string occurences in a file (line by line) or directory +(all directory and filenames in the tree). + +```commandline +Usage: sher.py [options] + +Options: + -h, --help show this help message and exit + -f S_FILE, --file=S_FILE + Searching lines in the given file. + -d S_DIR, --directory=S_DIR + Searching files in a directory. + -q QUERY, --query=QUERY + The search term. Supporting "". + -l, --loop Runs the program in an endless loop. + ``` \ No newline at end of file diff --git a/lib/fsutils.py b/lib/fsutils.py index 4bc84b2..184d38d 100644 --- a/lib/fsutils.py +++ b/lib/fsutils.py @@ -68,3 +68,59 @@ class FileInfo: self._init_info(dest) else: self._init_info(dest) + + +class DirInfo: + """ A simple wrapper around the os path functions that returns basic directory info + and let's you peform basic directory tasks.""" + + def __init__(self, dirname: str): + self._init_info(dirname) + + def _init_info(self, dirname: str): + """ Set's all the required variables for performing file tasks and to + access when working with the file object. """ + # stringvars + self._path = os.path.normpath(dirname.replace('\\', '/')).encode('utf-8') + if not os.path.isdir(self._path): + raise Exception("Not a Directory") + self.parent_dir, self.basename = os.path.split(self._path) + self.fullname = os.path.join(self.parent_dir, self.basename) + # boolvars + self.exist = os.path.exists(self.fullname) + self.ismount = self.islink = False + if self.exist: + self.ismount = os.path.ismount(self.fullname) + self.islink = os.path.islink(self.fullname) + + def get_content(self) -> list: + """ Returns the content of the directory without subdirectory contents. """ + return os.listdir(self.fullname) + + def get_full_content(self) -> list: + """ Returns the content of the direcdtory tree. """ + content = [] + for dirname, dirnames, filenames in os.walk(self.fullname): + # print path to all subdirectories first. + for subdirname in dirnames: + content.append(os.path.join(dirname, subdirname).decode('utf-8')) + + # print path to all filenames. + for filename in filenames: + content.append(os.path.join(dirname, filename).decode('utf-8')) + return content + + def delete(self): + shutil.rmtree(self.fullname) + + def delete_empty(self): + """ Deletes the directory if it is empty. Raises an Exception if it is not. """ + if len(self.get_content()): + raise Exception('Directory not empty') + else: + self.delete() + + def create(self): + if not self.exist: + os.mkdir(self.fullname) + self._init_info(self.fullname) diff --git a/lib/miscutils.py b/lib/miscutils.py new file mode 100644 index 0000000..d4179a2 --- /dev/null +++ b/lib/miscutils.py @@ -0,0 +1,64 @@ +class SearchEngine: + """ A Search engine that searches a list of strings for + specific string occurences. """ + + def __init__(self, data: list): + self.data = data + + def search(self, q: str) -> tuple: + q = q.lower() + _score_dic = {} + + # checking for query syntax + if '"' in q: + _static_q = q.split('"')[1::2] + print("STATIC: {}".format(_static_q)) + _flex_q = q.split('"')[0::2] + print("FLEX: {}".format(_flex_q)) + + for _static_single in _static_q: # search for whole term + _score_dic.update(self._static_search(_static_single)) + + for _flex_single in _flex_q: # search for single words + _score_dic.update(self._flex_search(_flex_single.split(' '))) + else: + _flex_q = q.split(' ') + print("FLEX: {}".format(_flex_q)) + _score_dic.update(self._flex_search(_flex_q)) + + returnable = sorted(_score_dic, key=_score_dic.get) + returnable.reverse() + return tuple(returnable) # change into a tuple because the order matters + + def _flex_search(self, q: list, data: list=None) -> dict: # single word search + _rescore_dic = {} # returnable score dictionary + + if not data: + data = self.data + + if len(q) > 0: + for entry in data: + for sin_query in q: + if sin_query in entry.lower() and len(sin_query) > 0: + if entry not in _rescore_dic.keys(): + _rescore_dic[entry] = 0 + # for each appereance the rank score increases + _rescore_dic[entry] += (entry.lower().count(sin_query) / 10) + 0.9 + + return _rescore_dic + + def _static_search(self, q: str, data: list=None) -> dict: # whole term search + _rescore_dic = {} + + if not data: + data = self.data + + if len(q) > 0: + for entry in data: + if q in entry.lower(): + if entry not in _rescore_dic.keys(): + _rescore_dic[entry] = 0 + + _rescore_dic[entry] += (entry.lower().count(q) * 100) + + return _rescore_dic diff --git a/sher.py b/sher.py new file mode 100644 index 0000000..26075a3 --- /dev/null +++ b/sher.py @@ -0,0 +1,59 @@ +import os +import optparse + +from lib import miscutils, fsutils + +sources = {} + + +def optparse_init() -> tuple: + parser = optparse.OptionParser() + parser.add_option('-f', '--file', type='string', dest='s_file', help='Searching lines in the given file.') + parser.add_option('-d', '--directory', type='string', dest='s_dir', help='Searching files in a directory.') + parser.add_option('-q', '--query', type='string', dest='query', help='The search term. Supporting "".') + parser.add_option('-l', '--loop', action='store_true', default=False, dest='loop', help="""Runs the program in + an endless loop.""") + return parser.parse_args() + + +def read_file_source(fname): + finfo = fsutils.FileInfo(fname) + if finfo.exist: + with finfo.open('r') as f: + return list(f.readlines()) + + +def read_directory_source(dirname): + """ Reading the contents of the directory """ + dinfo = fsutils.DirInfo(dirname) + if dinfo.exist: + return dinfo.get_full_content() + + +def main(): + options, args = optparse_init() + engines = [] + + if options.s_file: + e_file = miscutils.SearchEngine(read_file_source(options.s_file)) + engines.append(e_file) + + if options.s_dir: + e_dir = miscutils.SearchEngine(read_directory_source(options.s_dir)) + engines.append(e_dir) + + while True: + if options.query and not options.loop: + query = options.query + else: + query = input('Search: ') + + for engine in engines: + for res in engine.search(query): + print(res) + if not options.loop: + break + + +if __name__ == '__main__': + main()