diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b9a66ac --- /dev/null +++ b/.dockerignore @@ -0,0 +1,129 @@ +Dockerfile +README.md +*.pyc +*.pyo +*.pyd +__pycache__ +nvenv/* + + +.idea/* +.pyre/* + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.DS_Store +.api.py +.db_config.py +db_config.py +.vscode/settings.json +config.json +credentials.json +folder_id.txt +settings.yaml +.export_env_vars +auth.json +settings.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..f3d5c41 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/okr-feature-request.md b/.github/ISSUE_TEMPLATE/okr-feature-request.md new file mode 100644 index 0000000..87cd222 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/okr-feature-request.md @@ -0,0 +1,28 @@ +--- +name: OKR Feature Request +about: Suggest a clear objective & key result with a feature request +title: '' +labels: enhancement +assignees: '' + +--- + +## Objective +Support progress toward the [pick_a_milestone_from_here][1] by _improving something_ + +## Key Result +[Commit / Publish / Post / Report / Deploy] _something_ to _somewhere_ by _sometime_, _somehow_ + +## Details + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. + +Add any fun tidbits that may be relevant for getting this done + +Or just fun tidbits for the sake of learning 😊 + +[1]: https://github.com/calpoly-csai/api/milestones diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..de14a30 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,36 @@ +## What's New? +Please include a summary of the change and which issue is fixed. +Please also include relevant motivation and context. +List any dependencies that are required for this change. + +## Fixes #<> ... `(e.g. #9)` + +## Type of change (pick-one) +- [ ] Bug fix (_non-breaking change which fixes an issue_) +- [ ] New feature (_non-breaking change which adds functionality_) +- [ ] Breaking change (_fix or feature that would cause existing functionality to not work as expected_) +- [ ] This change requires a documentation update + +## How Has This Been Tested? +Please describe the tests that you ran to verify your changes. +Provide instructions so we can reproduce. +Please also list any relevant details for your test configuration (hardware/operating system/etc). + +## Checklist (check-all-before-merge) +_formatting help: `- [x]` means "checked' and `- [ ]` means "unchecked"_ + +- [ ] I documented my code according to the [Google Python Style Guide][1] + +- [ ] I ran `./build_docs.sh` and the docs look fine + +- [ ] I ran `./type_check.sh` and got no errors + +- [ ] I ran `./format.sh` because it automatically cleans my code for me 😄 + +- [ ] I ran `./lint.sh` to check for what "format" missed + +- [ ] I added my tests to the `/tests` directory + +- [ ] I ran `./run_tests.sh` and all the tests pass + +[1]: https://google.github.io/styleguide/pyguide.html diff --git a/.github/workflows/deploy_staging.yml b/.github/workflows/deploy_staging.yml new file mode 100644 index 0000000..42a4434 --- /dev/null +++ b/.github/workflows/deploy_staging.yml @@ -0,0 +1,51 @@ +name: Deploy Dev To Staging Server + +# on: +# push: +# branches: +# - dev + + +jobs: + deploy: + name: deploy to heroku + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: heroku login + uses: actions/heroku@master + env: + HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }} + with: + args: container:login + + - name: heroku push + uses: actions/heroku@master + env: + HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }} + DATABASE_HOSTNAME: ${{ secrets.DATABASE_HOSTNAME }} + DATABASE_PASSWORD: ${{ secrets.DATABASE_PASSWORD }} + DATABASE_USERNAME: ${{ secrets.DATABASE_USERNAME }} + DATABASE_NAME: ${{ secrets.DATABASE_NAME }} + PYDRIVE_CLIENT_ID: ${{ secrets.PYDRIVE_CLIENT_ID }} + PYDRIVE_CLIENT_SECRET: ${{ secrets.PYDRIVE_CLIENT_SECRET }} + # in issue #67 we considered generating + # credentials.json in a similar fashion to config.json + # but figured that the file was unreasonably large + # and therefore a simple JSON string would suffice. + # https://github.com/calpoly-csai/api/issues/67 + GOOGLE_DRIVE_CREDENTIALS: ${{ secrets.GOOGLE_DRIVE_CREDENTIALS }} + GOOGLE_DRIVE_FOLDER_ID: ${{ secrets.GOOGLE_DRIVE_FOLDER_ID }} + GOOGLE_CLOUD_NLP_CREDENTIALS: ${{ secrets.GOOGLE_CLOUD_NLP_CREDENTIALS }} + GOOGLE_CLOUD_NLP_MODEL_NAME: ${{ secrets.GOOGLE_CLOUD_NLP_MODEL_NAME }} + with: + args: container:push -a calpoly-csai-nimbus web --arg DATABASE_HOSTNAME,DATABASE_PASSWORD,DATABASE_USERNAME,DATABASE_NAME,PYDRIVE_CLIENT_ID,PYDRIVE_CLIENT_SECRET,GOOGLE_DRIVE_CREDENTIALS,GOOGLE_DRIVE_FOLDER_ID,GOOGLE_CLOUD_NLP_CREDENTIALS,GOOGLE_CLOUD_NLP_MODEL_NAME + + - name: heroku release + uses: actions/heroku@master + env: + HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }} + with: + args: container:release -a calpoly-csai-nimbus web diff --git a/.github/workflows/gce.yaml b/.github/workflows/gce.yaml new file mode 100644 index 0000000..4b1a372 --- /dev/null +++ b/.github/workflows/gce.yaml @@ -0,0 +1,98 @@ +# Copyright 2020 Google, LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Build and Deploy to Google Compute Engine + +on: + push: + branches: + - dev + +env: + GCE_PROJECT: ${{ secrets.GCE_PROJECT }} + GCE_INSTANCE: ${{ secrets.INSTANCE_NAME }} + GCE_INSTANCE_ZONE: us-west1-b # e.g. us-central1-a + # environment variables for the build + DATABASE_HOSTNAME: ${{ secrets.DATABASE_HOSTNAME }} + DATABASE_PASSWORD: ${{ secrets.DATABASE_PASSWORD }} + DATABASE_USERNAME: ${{ secrets.DATABASE_USERNAME }} + DATABASE_NAME: ${{ secrets.DATABASE_NAME }} + PYDRIVE_CLIENT_ID: ${{ secrets.PYDRIVE_CLIENT_ID }} + PYDRIVE_CLIENT_SECRET: ${{ secrets.PYDRIVE_CLIENT_SECRET }} + # in issue #67 we considered generating + # credentials.json in a similar fashion to config.json + # but figured that the file was unreasonably large + # and therefore a simple JSON string would suffice. + # https://github.com/calpoly-csai/api/issues/67 + GOOGLE_DRIVE_CREDENTIALS: ${{ secrets.GOOGLE_DRIVE_CREDENTIALS }} + GOOGLE_DRIVE_FOLDER_ID: ${{ secrets.GOOGLE_DRIVE_FOLDER_ID }} + GOOGLE_CLOUD_NLP_CREDENTIALS: ${{ secrets.GOOGLE_CLOUD_NLP_CREDENTIALS }} + GOOGLE_CLOUD_NLP_MODEL_NAME: ${{ secrets.GOOGLE_CLOUD_NLP_MODEL_NAME }} + GIT_SSH_CERT: ${{secrets.GIT_SSH_CERT}} + PORT: ${{secrets.SSL_PORT}} + +jobs: + setup-build-publish-deploy: + name: Setup, Build, Publish, and Deploy + runs-on: ubuntu-latest + steps: + + - name: Checkout + uses: actions/checkout@master + + # Setup gcloud CLI + - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master + with: + version: '270.0.0' + service_account_email: ${{ secrets.SA_EMAIL }} + service_account_key: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + + # Configure docker to use the gcloud command-line tool as a credential helper + - run: | + gcloud auth configure-docker + # Build the Docker image + - name: Build + run: | + docker build -t gcr.io/$GCE_PROJECT/$GCE_INSTANCE-image:$GITHUB_SHA \ + --build-arg GITHUB_SHA="$GITHUB_SHA" \ + --build-arg GITHUB_REF="$GITHUB_REF" \ + --build-arg DATABASE_HOSTNAME \ + --build-arg DATABASE_PASSWORD \ + --build-arg DATABASE_USERNAME \ + --build-arg DATABASE_NAME \ + --build-arg PYDRIVE_CLIENT_ID \ + --build-arg PYDRIVE_CLIENT_SECRET \ + --build-arg GOOGLE_DRIVE_CREDENTIALS \ + --build-arg GOOGLE_DRIVE_FOLDER_ID \ + --build-arg GOOGLE_CLOUD_NLP_CREDENTIALS \ + --build-arg GOOGLE_CLOUD_NLP_MODEL_NAME \ + --build-arg GIT_SSH_CERT \ + --build-arg PORT . + + # Push the Docker image to Google Container Registry + - name: Publish + run: | + docker push gcr.io/$GCE_PROJECT/$GCE_INSTANCE-image:$GITHUB_SHA + - name: Deploy + run: | + gcloud compute instances update-container $GCE_INSTANCE \ + --zone $GCE_INSTANCE_ZONE \ + --container-image=gcr.io/$GCE_PROJECT/$GCE_INSTANCE-image:$GITHUB_SHA \ + --project=$GCE_PROJECT + + # if we don't run cleanup, the disk on google cloud will fill with old images. + # run docker's prune command to cleanup old images. + - name: Cleanup + run: | + gcloud compute ssh $GCE_INSTANCE --zone=$GCE_INSTANCE_ZONE --project=$GCE_PROJECT --command='docker image prune -a -f' \ No newline at end of file diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml new file mode 100644 index 0000000..f047084 --- /dev/null +++ b/.github/workflows/style_check.yml @@ -0,0 +1,48 @@ +name: Python PEP8 Style Check + +on: + pull_request: + branches: + - master + - dev + push: + branches: + - master + - dev + pull_request_review_comment: + types: [created] + + +jobs: + style_check: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + + - name: Set up Python 3.6 + uses: actions/setup-python@v1 + with: + python-version: 3.6 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Lint with flake8 + run: | + pip install flake8 + + flake8 --count \ + --ignore E722 \ + --show-source --statistics \ + --exclude .git,__pycache__,venv,build,dist,docs \ + --max-complexity 10 \ + --max-line-length=127 + + ## stop the build if there are Python syntax errors or undefined names + #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + ## exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..4e58e8b --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,36 @@ +name: Run Tests + +on: + pull_request: + branches: + - master + - dev + push: + branches: + - master + - dev + pull_request_review_comment: + types: [created] + +jobs: + run_tests: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + + - name: Set up Python 3.6 + uses: actions/setup-python@v1 + with: + python-version: 3.6 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + pip install hypothesis + + - name: Test with hypothesis + run: ./run_tests.sh diff --git a/.github/workflows/type_check.yml b/.github/workflows/type_check.yml new file mode 100644 index 0000000..444c136 --- /dev/null +++ b/.github/workflows/type_check.yml @@ -0,0 +1,38 @@ +name: Python Pyre Type Annotation Check + +on: + pull_request: + branches: + - master + - dev + push: + branches: + - master + - dev + +jobs: + type_check: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + + - name: Set up Python 3.6 + uses: actions/setup-python@v1 + with: + python-version: 3.6 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run the Type Checker + run: | + echo "todo..." + ./type_check.sh + - name: Run Type Annotation Coverage + run: | + echo "todo..." + echo "todo..." \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2ee68c2..f479798 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +nvenv/* + + +.idea/* +.pyre/* + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -106,3 +113,17 @@ venv.bak/ .api.py .db_config.py db_config.py +.vscode/* +config.json +credentials.json +folder_id.txt +settings.yaml +.export_env_vars +auth.json +settings.yml +settings.yaml +nimbus_api.iml + +# models +*pkl +id_rsa diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 0000000..fa2f60a --- /dev/null +++ b/.style.yapf @@ -0,0 +1,350 @@ +[style] +# Align closing bracket with visual indentation. +align_closing_bracket_with_visual_indent=False + +# Allow dictionary keys to exist on multiple lines. For example: +# +# x = { +# ('this is the first element of a tuple', +# 'this is the second element of a tuple'): +# value, +# } +allow_multiline_dictionary_keys=False + +# Allow lambdas to be formatted on more than one line. +allow_multiline_lambdas=False + +# Allow splitting before a default / named assignment in an argument list. +allow_split_before_default_or_named_assigns=True + +# Allow splits before the dictionary value. +allow_split_before_dict_value=True + +# Let spacing indicate operator precedence. For example: +# +# a = 1 * 2 + 3 / 4 +# b = 1 / 2 - 3 * 4 +# c = (1 + 2) * (3 - 4) +# d = (1 - 2) / (3 + 4) +# e = 1 * 2 - 3 +# f = 1 + 2 + 3 + 4 +# +# will be formatted as follows to indicate precedence: +# +# a = 1*2 + 3/4 +# b = 1/2 - 3*4 +# c = (1+2) * (3-4) +# d = (1-2) / (3+4) +# e = 1*2 - 3 +# f = 1 + 2 + 3 + 4 +# +arithmetic_precedence_indication=False + +# Number of blank lines surrounding top-level function and class +# definitions. +blank_lines_around_top_level_definition=2 + +# Insert a blank line before a class-level docstring. +blank_line_before_class_docstring=False + +# Insert a blank line before a module docstring. +blank_line_before_module_docstring=False + +# Insert a blank line before a 'def' or 'class' immediately nested +# within another 'def' or 'class'. For example: +# +# class Foo: +# # <------ this blank line +# def method(): +# ... +blank_line_before_nested_class_or_def=True + +# Do not split consecutive brackets. Only relevant when +# dedent_closing_brackets is set. For example: +# +# call_func_that_takes_a_dict( +# { +# 'key1': 'value1', +# 'key2': 'value2', +# } +# ) +# +# would reformat to: +# +# call_func_that_takes_a_dict({ +# 'key1': 'value1', +# 'key2': 'value2', +# }) +coalesce_brackets=False + +# The column limit. +column_limit=80 + +# The style for continuation alignment. Possible values are: +# +# - SPACE: Use spaces for continuation alignment. This is default behavior. +# - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns +# (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs) for continuation +# alignment. +# - VALIGN-RIGHT: Vertically align continuation lines with indent +# characters. Slightly right (one more indent character) if cannot +# vertically align continuation lines with indent characters. +# +# For options FIXED, and VALIGN-RIGHT are only available when USE_TABS is +# enabled. +continuation_align_style=SPACE + +# Indent width used for line continuations. +continuation_indent_width=4 + +# Put closing brackets on a separate line, dedented, if the bracketed +# expression can't fit in a single line. Applies to all kinds of brackets, +# including function definitions and calls. For example: +# +# config = { +# 'key1': 'value1', +# 'key2': 'value2', +# } # <--- this bracket is dedented and on a separate line +# +# time_series = self.remote_client.query_entity_counters( +# entity='dev3246.region1', +# key='dns.query_latency_tcp', +# transform=Transformation.AVERAGE(window=timedelta(seconds=60)), +# start_ts=now()-timedelta(days=3), +# end_ts=now(), +# ) # <--- this bracket is dedented and on a separate line +dedent_closing_brackets=False + +# Disable the heuristic which places each list element on a separate line +# if the list is comma-terminated. +disable_ending_comma_heuristic=False + +# Place each dictionary entry onto its own line. +each_dict_entry_on_separate_line=True + +# The regex for an i18n comment. The presence of this comment stops +# reformatting of that line, because the comments are required to be +# next to the string they translate. +i18n_comment=#\..* + +# The i18n function call names. The presence of this function stops +# reformattting on that line, because the string it has cannot be moved +# away from the i18n comment. +i18n_function_call=N_, _ + +# Indent blank lines. +indent_blank_lines=False + +# Put closing brackets on a separate line, indented, if the bracketed +# expression can't fit in a single line. Applies to all kinds of brackets, +# including function definitions and calls. For example: +# +# config = { +# 'key1': 'value1', +# 'key2': 'value2', +# } # <--- this bracket is indented and on a separate line +# +# time_series = self.remote_client.query_entity_counters( +# entity='dev3246.region1', +# key='dns.query_latency_tcp', +# transform=Transformation.AVERAGE(window=timedelta(seconds=60)), +# start_ts=now()-timedelta(days=3), +# end_ts=now(), +# ) # <--- this bracket is indented and on a separate line +indent_closing_brackets=False + +# Indent the dictionary value if it cannot fit on the same line as the +# dictionary key. For example: +# +# config = { +# 'key1': +# 'value1', +# 'key2': value1 + +# value2, +# } +indent_dictionary_value=True + +# The number of columns to use for indentation. +indent_width=4 + +# Join short lines into one line. E.g., single line 'if' statements. +join_multiple_lines=False + +# Do not include spaces around selected binary operators. For example: +# +# 1 + 2 * 3 - 4 / 5 +# +# will be formatted as follows when configured with "*,/": +# +# 1 + 2*3 - 4/5 +no_spaces_around_selected_binary_operators= + +# Use spaces around default or named assigns. +spaces_around_default_or_named_assign=False + +# Use spaces around the power operator. +spaces_around_power_operator=False + +# The number of spaces required before a trailing comment. +# This can be a single value (representing the number of spaces +# before each trailing comment) or list of values (representing +# alignment column values; trailing comments within a block will +# be aligned to the first column value that is greater than the maximum +# line length within the block). For example: +# +# With spaces_before_comment=5: +# +# 1 + 1 # Adding values +# +# will be formatted as: +# +# 1 + 1 # Adding values <-- 5 spaces between the end of the statement and comment +# +# With spaces_before_comment=15, 20: +# +# 1 + 1 # Adding values +# two + two # More adding +# +# longer_statement # This is a longer statement +# short # This is a shorter statement +# +# a_very_long_statement_that_extends_beyond_the_final_column # Comment +# short # This is a shorter statement +# +# will be formatted as: +# +# 1 + 1 # Adding values <-- end of line comments in block aligned to col 15 +# two + two # More adding +# +# longer_statement # This is a longer statement <-- end of line comments in block aligned to col 20 +# short # This is a shorter statement +# +# a_very_long_statement_that_extends_beyond_the_final_column # Comment <-- the end of line comments are aligned based on the line length +# short # This is a shorter statement +# +spaces_before_comment=2 + +# Insert a space between the ending comma and closing bracket of a list, +# etc. +space_between_ending_comma_and_closing_bracket=False + +# Split before arguments +split_all_comma_separated_values=False + +# Split before arguments, but do not split all subexpressions recursively +# (unless needed). +split_all_top_level_comma_separated_values=False + +# Split before arguments if the argument list is terminated by a +# comma. +split_arguments_when_comma_terminated=False + +# Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@' +# rather than after. +split_before_arithmetic_operator=False + +# Set to True to prefer splitting before '&', '|' or '^' rather than +# after. +split_before_bitwise_operator=False + +# Split before the closing bracket if a list or dict literal doesn't fit on +# a single line. +split_before_closing_bracket=True + +# Split before a dictionary or set generator (comp_for). For example, note +# the split before the 'for': +# +# foo = { +# variable: 'Hello world, have a nice day!' +# for variable in bar if variable != 42 +# } +split_before_dict_set_generator=False + +# Split before the '.' if we need to split a longer expression: +# +# foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d)) +# +# would reformat to something like: +# +# foo = ('This is a really long string: {}, {}, {}, {}' +# .format(a, b, c, d)) +split_before_dot=False + +# Split after the opening paren which surrounds an expression if it doesn't +# fit on a single line. +split_before_expression_after_opening_paren=False + +# If an argument / parameter list is going to be split, then split before +# the first argument. +split_before_first_argument=False + +# Set to True to prefer splitting before 'and' or 'or' rather than +# after. +split_before_logical_operator=False + +# Split named assignments onto individual lines. +split_before_named_assigns=True + +# Set to True to split list comprehensions and generators that have +# non-trivial expressions and multiple clauses before each of these +# clauses. For example: +# +# result = [ +# a_long_var + 100 for a_long_var in xrange(1000) +# if a_long_var % 10] +# +# would reformat to something like: +# +# result = [ +# a_long_var + 100 +# for a_long_var in xrange(1000) +# if a_long_var % 10] +split_complex_comprehension=True + +# The penalty for splitting right after the opening bracket. +split_penalty_after_opening_bracket=300 + +# The penalty for splitting the line after a unary operator. +split_penalty_after_unary_operator=10000 + +# The penalty of splitting the line around the '+', '-', '*', '/', '//', +# ``%``, and '@' operators. +split_penalty_arithmetic_operator=300 + +# The penalty for splitting right before an if expression. +split_penalty_before_if_expr=0 + +# The penalty of splitting the line around the '&', '|', and '^' +# operators. +split_penalty_bitwise_operator=300 + +# The penalty for splitting a list comprehension or generator +# expression. +split_penalty_comprehension=2100 + +# The penalty for characters over the column limit. +split_penalty_excess_character=7000 + +# The penalty incurred by adding a line split to the unwrapped line. The +# more line splits added the higher the penalty. +split_penalty_for_added_line_split=30 + +# The penalty of splitting a list of "import as" names. For example: +# +# from a_very_long_or_indented_module_name_yada_yad import (long_argument_1, +# long_argument_2, +# long_argument_3) +# +# would reformat to something like: +# +# from a_very_long_or_indented_module_name_yada_yad import ( +# long_argument_1, long_argument_2, long_argument_3) +split_penalty_import_names=0 + +# The penalty of splitting the line around the 'and' and 'or' +# operators. +split_penalty_logical_operator=300 + +# Use the Tab character for indentation. +use_tabs=False + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6f08bda --- /dev/null +++ b/Dockerfile @@ -0,0 +1,102 @@ +# +# This Dockerfile will configure the environment for Google Compute Engine +# +FROM ubuntu:latest + +# the chmod will +# resolve PermissionError on heroku +# more context in issue #100 +# TODO: make chmod less insecure by only setting needed permissions +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update \ + && apt-get install -y python3-pip python3-dev certbot cron git \ + && cd /usr/local/bin \ + && ln -s /usr/bin/python3 python \ + && pip3 install --upgrade pip \ + && chmod 777 /usr/lib/python3/dist-packages/* + +# verify permissions set +RUN ls -lah /usr/lib/python3/dist-packages/ + +# put the requirements file into the container +ADD requirements.txt /nimbus/requirements.txt + +# install the requirements in the container +RUN pip3 install -r /nimbus/requirements.txt \ + && chmod 777 /usr/lib/python3/dist-packages/* + +# verify permissions set +RUN ls -lah /usr/lib/python3/dist-packages/ + +# put all the code into nimbus folder +ADD . /nimbus + +EXPOSE 443:443 + +# need to declare the --build-arg that gets passed in +# for ENV to work properly +ARG DATABASE_HOSTNAME +ARG DATABASE_PASSWORD +ARG DATABASE_USERNAME +ARG DATABASE_NAME +ARG PYDRIVE_CLIENT_ID +ARG PYDRIVE_CLIENT_SECRET +ARG GOOGLE_DRIVE_CREDENTIALS +ARG GOOGLE_DRIVE_FOLDER_ID +ARG GOOGLE_CLOUD_NLP_CREDENTIALS +ARG GOOGLE_CLOUD_NLP_MODEL_NAME +ARG GIT_SSH_CERT +# this lets you say `ENV PORT ${PORT}` below (the one within the ${...}) +ARG PORT + + +# env variables needed for the setup_special_files_from_env.py +ENV DATABASE_HOSTNAME ${DATABASE_HOSTNAME} +ENV DATABASE_PASSWORD ${DATABASE_PASSWORD} +ENV DATABASE_USERNAME ${DATABASE_USERNAME} +ENV DATABASE_NAME ${DATABASE_NAME} +ENV PYDRIVE_CLIENT_ID ${PYDRIVE_CLIENT_ID} +ENV PYDRIVE_CLIENT_SECRET ${PYDRIVE_CLIENT_SECRET} +ENV GOOGLE_DRIVE_CREDENTIALS ${GOOGLE_DRIVE_CREDENTIALS} +ENV GOOGLE_DRIVE_FOLDER_ID ${GOOGLE_DRIVE_FOLDER_ID} +ENV GOOGLE_CLOUD_NLP_CREDENTIALS ${GOOGLE_CLOUD_NLP_CREDENTIALS} +ENV GOOGLE_CLOUD_NLP_MODEL_NAME ${GOOGLE_CLOUD_NLP_MODEL_NAME} +ENV GIT_SSH_CERT ${GIT_SSH_CERT} +# gunicorn will look for PORT. +# --build-arg PORT="$SSL_PORT" will choose `443` for production +ENV PORT ${PORT} + + +# need set WORKDIR for setup_special_files_from_env.py to save config.json to right place +WORKDIR /nimbus + +# generate all the special configuration files +RUN ./setup_special_files_from_env.py + +RUN python -m pip install --upgrade urllib3 + +# get en_core_web_sm +RUN python3 -m spacy download en_core_web_sm +# RUN python3 -m spacy download en_core_web_lg + +# just make sure the file is there +RUN ls | grep config + +# need set WORKDIR for gunicorn +WORKDIR /nimbus + +# verify permissions set +RUN ls -lah /usr/lib/python3/dist-packages/ + +# setup SSH keys correctly +RUN /nimbus/scripts/setup_letsencrypt.sh + +# the gunicorn_config.py will check the env vars for PORT +# else it will do port=8080 +CMD ["gunicorn", \ + "flask_api:app", \ + "--config=gunicorn_config.py", \ + "--keyfile=/etc/letsencrypt/live/nimbus.api.calpolycsai.com/privkey.pem", \ + "--certfile=/etc/letsencrypt/live/nimbus.api.calpolycsai.com/cert.pem", \ + "--ca-certs=/etc/letsencrypt/live/nimbus.api.calpolycsai.com/chain.pem" \ + ] diff --git a/Dockerfile_heroku b/Dockerfile_heroku new file mode 100644 index 0000000..cc3aa9b --- /dev/null +++ b/Dockerfile_heroku @@ -0,0 +1,96 @@ +# FROM python:3.6-stretch +# FROM python:3.8-buster # needs pip install numpy +# FROM python:3.7-stretch +FROM ubuntu:bionic +RUN apt update + +# the chmod will +# resolve PermissionError on heroku +# more context in issue #100 +# TODO: make chmod less insecure by only setting needed permissions +RUN apt-get update \ + && apt-get install -y python3-pip python3-dev \ + && cd /usr/local/bin \ + && ln -s /usr/bin/python3 python \ + && pip3 install --upgrade pip \ + && chmod 777 /usr/lib/python3/dist-packages/* + +# verify permissions set +RUN ls -lah /usr/lib/python3/dist-packages/ + +# put the requirements file into the container +ADD requirements.txt /nimbus/requirements.txt + +# install the requirements in the container +RUN pip install -r /nimbus/requirements.txt \ + && chmod 777 /usr/lib/python3/dist-packages/* + +# verify permissions set +RUN ls -lah /usr/lib/python3/dist-packages/ + +# put all the code into nimbus folder +ADD . /nimbus + +# # https://devcenter.heroku.com/articles/container-registry-and-runtime#unsupported-dockerfile-commands +# # Expose is NOT supported by Heroku +# EXPOSE 8080 + +# need to declare the --build-arg that gets passed in +# for ENV to work properly +ARG DATABASE_HOSTNAME +ARG DATABASE_PASSWORD +ARG DATABASE_USERNAME +ARG DATABASE_NAME +ARG PYDRIVE_CLIENT_ID +ARG PYDRIVE_CLIENT_SECRET +ARG GOOGLE_DRIVE_CREDENTIALS +ARG GOOGLE_DRIVE_FOLDER_ID +ARG GOOGLE_CLOUD_NLP_CREDENTIALS +ARG GOOGLE_CLOUD_NLP_MODEL_NAME + +# env variables needed for the setup...py +ENV DATABASE_HOSTNAME ${DATABASE_HOSTNAME} +ENV DATABASE_PASSWORD ${DATABASE_PASSWORD} +ENV DATABASE_USERNAME ${DATABASE_USERNAME} +ENV DATABASE_NAME ${DATABASE_NAME} +ENV PYDRIVE_CLIENT_ID ${PYDRIVE_CLIENT_ID} +ENV PYDRIVE_CLIENT_SECRET ${PYDRIVE_CLIENT_SECRET} +ENV GOOGLE_DRIVE_CREDENTIALS ${GOOGLE_DRIVE_CREDENTIALS} +ENV GOOGLE_DRIVE_FOLDER_ID ${GOOGLE_DRIVE_FOLDER_ID} +ENV GOOGLE_CLOUD_NLP_CREDENTIALS ${GOOGLE_CLOUD_NLP_CREDENTIALS} +ENV GOOGLE_CLOUD_NLP_MODEL_NAME ${GOOGLE_CLOUD_NLP_MODEL_NAME} + +# need set WORKDIR for setup...py to save config.json to right place +WORKDIR /nimbus + +# generate all the special configuration files +RUN ./setup_special_files_from_env.py + +## download the nlp stuff +# RUN ./download_nlp_stuff.sh +# +## download the nltk stuff +# RUN python3 download_nltk_stuff.py + +# the above "download..." scripts were inconsistent on herkou +# so lets download the required stuff directly +RUN python3 -m spacy download en_core_web_sm +# TODO: consider en_core_web_lg or en_core_web_md because server can handle it +# RUN python3 -m spacy download en_core_web_lg +RUN python3 -m nltk.downloader punkt +RUN python3 -m nltk.downloader averaged_perceptron_tagger + +# just make sure the file is there +RUN ls | grep config + +# need set WORKDIR for gunicorn +WORKDIR /nimbus + +# verify permissions set +RUN ls -lah /usr/lib/python3/dist-packages/ + +# https://github.com/heroku/alpinehelloworld/blob/master/Dockerfile +# Heroku will set the PORT environment variable +# the gunicorn_config.py will check the env vars for PORT +# else it will do port=8080 +CMD ["gunicorn", "flask_api:app", "--config=gunicorn_config.py"] \ No newline at end of file diff --git a/Entity/AudioSampleMetaData.py b/Entity/AudioSampleMetaData.py new file mode 100644 index 0000000..753df0b --- /dev/null +++ b/Entity/AudioSampleMetaData.py @@ -0,0 +1,52 @@ +from sqlalchemy import Column, Integer, String, Text, Enum, Boolean +from sqlalchemy.ext.declarative import declarative_base +import enum + +# This is the way SQLAlchemy initializes their special classes +Base = declarative_base() + + +class NoiseLevel(enum.Enum): + quiet = 1 + medium = 2 + loud = 3 + + +class AudioSampleMetaData(Base): + __tablename__ = "AudioSampleMetaData" + id = Column(Integer, primary_key=True) + # SQLAlchemy resolves Boolean to TINYINT within MYSQL + is_wake_word = Column(Boolean) + # TODO: run a magical SQL script that support emojis in first_name + first_name = Column(String(255)) + last_name = Column(String(255)) + gender = Column(String(3)) + noise_level = Column(Enum(NoiseLevel)) + location = Column(String(255)) + tone = Column(String(255)) + timestamp = Column(Integer) + username = Column(String(255)) + emphasis = Column(String(255)) + script = Column(String(255)) + audio_file_id = Column(String(1024)) + is_view = False + + def __repr__(self): + string = "".format( + self.date, self.raw_events_text + ) diff --git a/Entity/Clubs.py b/Entity/Clubs.py new file mode 100644 index 0000000..0161f31 --- /dev/null +++ b/Entity/Clubs.py @@ -0,0 +1,36 @@ +from sqlalchemy import Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class Clubs(Base): + __tablename__ = "Clubs" + id_clubs = Column(Integer, primary_key=True) + club_name = Column(String(255)) + types = Column(String(255)) + desc = Column(Text) + contact_email = Column(String(255)) + contact_email_2 = Column(String(255)) + contact_person = Column(String(255)) + # TODO: how big can a phone number be including extionsions and formatting? + contact_phone = Column(String(255)) + box = Column(String(3)) + advisor = Column(String(255)) + affiliation = Column(String(255)) + is_view = False + synonyms = [ + "fraternities", + "sororities", + "organizations", + "extracurriculars", + "groups", + ] + + def __repr__(self): + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" diff --git a/Entity/Corequisites.py b/Entity/Corequisites.py new file mode 100644 index 0000000..49b453a --- /dev/null +++ b/Entity/Corequisites.py @@ -0,0 +1,5 @@ +class Corequisites: + def __init__(self, courseId, coreqCourse): + self.course_id = courseId + self.coreq_course = coreqCourse + self.is_view = False diff --git a/Entity/Corrections.py b/Entity/Corrections.py new file mode 100644 index 0000000..a02906e --- /dev/null +++ b/Entity/Corrections.py @@ -0,0 +1,15 @@ +class Corrections: + def __init__( + self, + correctionId, + rawText, + correctionText, + tableNameToCorrect, + columnNameToCorrect, + ): + self.correction_id = correctionId + self.raw_text = rawText + self.correction_text = correctionText + self.table_name_to_correct = tableNameToCorrect + self.column_name_to_correct = columnNameToCorrect + self.is_view = False diff --git a/Entity/Courses.py b/Entity/Courses.py new file mode 100644 index 0000000..0e8b337 --- /dev/null +++ b/Entity/Courses.py @@ -0,0 +1,42 @@ +from sqlalchemy import Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.dialects.mysql import SET +import enum + +# This is the way SQLAlchemy initializes their special classes +Base = declarative_base() + + +class CollegeStanding(enum.Enum): + """ + https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Enum + """ + + first_year = freshman = FR = 1 # 'FReshman' + second_year = sophomore = SO = 2 # 'SOphomore' + third_year = junior = JR = 3 # 'JunioR' + fourth_year = senior = SR = 4 # 'SenioR' + graduate = GR = 5 # 'GRaduate' + not_applicable = NA = 6 # 'NotApplicable/Unknown' + + +class Courses(Base): + __tablename__ = "Courses" + # TODO: update schema and this Courses class to follow snake_case convention + id = Column(Integer, primary_key=True) + dept = Column(String(5)) + course_num = Column(Integer) + terms_offered = Column(SET("F", "W", "SP", "SU", "TBD", "CR/NC", "NA")) + units = Column(String(5)) + course_name = Column(String(255)) + concurrent = Column(Text) + corequisites = Column(Text) + recommended = Column(Text) + prerequisites = Column(Text) + ge_areas = Column(Text) + desc = Column(Text) + is_view = False + synonyms = ["classes", "lessons", "lectures", "labs", "laboratories"] + + def __repr__(self): + return "".format(self.dept, self.course_num) diff --git a/Entity/Entity.py b/Entity/Entity.py new file mode 100644 index 0000000..3c3d189 --- /dev/null +++ b/Entity/Entity.py @@ -0,0 +1,69 @@ +class Entity: + """ + An instance of an element in the Nimbus SQL Database. + """ + + __mapper_args__ = { + "exclude_properties": [ + "validate", + "format", + "metadata", + "get_data", + "validators", + "formatters", + ] + } + + def __init__(self, data): + print("constructed Entity") + self.validate(data) + formatted_data = self.format(data) + for key in formatted_data: + setattr(self, key, formatted_data[key]) + + def validate(self, data): + """ + Checks if data has all required fields. Raises exception if data is misformed. + Note that you can have multiple validators to take in different data schemas. + Parameters + ---------- + `data:dict` Data to be validated, representing a single instance of the entity . + Raises + ------- + Some type of Exception based on the problem with the data + """ + pass + + def format(self, data) -> dict: + """ + Casts data to correct types, assigns keys to match object. + Parameters + ---------- + `data:dict` Data to be formatted, representing a single instance of the entity. + Returns + ------- + `dict` new data object which has been formatted. + """ + pass + + def get_data(self): + """ + Returns all fields that are related to table data. + """ + pass + + def update(self, data: dict) -> bool: + """ + Updates properties of the entity with the values in the `data` dict. + Parameters + ---------- + `data:dict` A subset of entity's properties to update. + """ + + def __repr__(self): + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" diff --git a/Entity/EntityToken.py b/Entity/EntityToken.py new file mode 100644 index 0000000..edaff8e --- /dev/null +++ b/Entity/EntityToken.py @@ -0,0 +1,30 @@ +from sqlalchemy import Column, String, Text +from sqlalchemy.ext.declarative import declarative_base +from Entity.Entity import Entity + +Base = declarative_base() + + +class EntityToken(Entity, Base): + __tablename__ = "EntityTokens" + __mapper_args__ = {"concrete": True} + id = Column(String(32), primary_key=True) + description = Column(Text) + name = Column(String(64)) + + def validate(self, data): + required_fields = ["id", "description", "name"] + for field in required_fields: + if field not in data: + raise Exception( + f"Required field `{field}` wasn't provided. Please provide the following[{required_fields}]" + ) + + def format(self, data) -> dict: + form = data.copy() + for key in form: + form[key] = str(form[key]) + return form + + def get_data(self): + return {"name": self.name, "description": self.description, "id": self.id} diff --git a/Entity/ErrorLog.py b/Entity/ErrorLog.py new file mode 100644 index 0000000..490bc21 --- /dev/null +++ b/Entity/ErrorLog.py @@ -0,0 +1,24 @@ +from sqlalchemy import Text, Integer, Column, TIMESTAMP +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class ErrorLog(Base): + __tablename__ = "ErrorLog" + id = Column(Integer, primary_key=True) + question = Column(Text) + stacktrace = Column(Text) + timestamp = Column(TIMESTAMP) + is_view = False + + def __repr__(self): + """ + A lazy __repr__ inspired by https://stackoverflow.com/a/60087190 + """ + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" # noqa diff --git a/Entity/ExpectedKeys.py b/Entity/ExpectedKeys.py new file mode 100644 index 0000000..5d03f53 --- /dev/null +++ b/Entity/ExpectedKeys.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +This module holds the expected keys for each Entity type. +""" + +from Entity.AudioSampleMetaData import AudioSampleMetaData, NoiseLevel +from Entity.Calendars import Calendars +from Entity.Clubs import Clubs +from Entity.Courses import Courses +from Entity.ErrorLog import ErrorLog +from Entity.Locations import Locations +from Entity.QueryFeedback import QueryFeedback +from Entity.QuestionAnswerPair import QuestionAnswerPair, AnswerType +from Entity.QuestionLog import QuestionLog +from Entity.OfficeHours import OfficeHours +from Entity.Professors import ProfessorsProperties +from Entity.Profs import Profs +from Entity.Professors import Professors +from Entity.ProfessorSectionView import ProfessorSectionView +from Entity.Sections import Sections, SectionType + +# Supported Entities only and their expected keys +EXPECTED_KEYS_BY_ENTITY = { + AudioSampleMetaData: [ + "is_wake_word", + "first_name", + "last_name", + "gender", + "noise_level", + "location", + "tone", + "timestamp", + "username", + "audio_file_id", + "script", + "emphasis", + ], + Clubs: [ + "club_name", + "types", + "desc", + "contact_email", + "contact_email_2", + "contact_person", + "contact_phone", + "box", + "advisor", + "affiliation", + ], + Calendars: ["date", "day", "month", "year", "raw_events_text",], + Courses: [ + "dept", + "course_num", + "course_name", + "units", + "prerequisites", + "corequisites", + "concurrent", + "recommended", + "terms_offered", + "ge_areas", + "desc", + ], + ErrorLog: ["question", "stacktrace", "timestamp",], + Locations: ["building_number", "name", "longitude", "latitude",], + Sections: [ + "section_name", + "instructor", + "alias", + "title", + "phone", + "office", + "type", + "days", + "start", + "end", + "location", + "department", + ], + QuestionAnswerPair: [ + "can_we_answer", + "verified", + "answer_type", + "question_format", + "answer_format", + ], + QueryFeedback: ["question", "answer", "answer_type", "timestamp",], + QuestionLog: ["question", "timestamp",], + Professors: [ + "first_name", + "last_name", + "phone_number", + "email", + "research_interests", + "office", + ], +} diff --git a/Entity/Locations.py b/Entity/Locations.py new file mode 100644 index 0000000..b32c477 --- /dev/null +++ b/Entity/Locations.py @@ -0,0 +1,36 @@ +from sqlalchemy import Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class Locations(Base): + __tablename__ = "Locations" + location_id = Column(Integer, primary_key=True) + building_number = Column(String(5)) + name = Column(String(100)) + longitude = Column(String(255)) + latitude = Column(String(255)) + is_view = False + synonyms = [ + "locations", + "secret hideout", + "secret_hideout", + "rooms", + "offices", + "buildings", + "labs", + "laboratory", + "laboratories", + "units", + "north", + "west", + "south", + "east", + "centers", + ] + + def __repr__(self): + return "".format( + self.building_number, self.name, self.longitude, self.latitude + ) diff --git a/Entity/OfficeHours.py b/Entity/OfficeHours.py new file mode 100644 index 0000000..ebd685c --- /dev/null +++ b/Entity/OfficeHours.py @@ -0,0 +1,43 @@ +from sqlalchemy import Column, Integer, Text +from sqlalchemy.ext.declarative import declarative_base + +# This is the way SQLAlchemy initializes their special classes +Base = declarative_base() + + +class OfficeHours(Base): + __tablename__ = "OfficeHours" + + id = Column(Integer, primary_key=True) + name = Column(Text) + office = Column(Text) + email = Column(Text) + monday = Column(Text) + tuesday = Column(Text) + wednesday = Column(Text) + thursday = Column(Text) + friday = Column(Text) + office_hours = Column(Text) + phone = Column(Text) + platform = Column(Text) + latest_quarter = Column(Text) + is_view = False + + def __repr__(self): + return "".format( + self.name, + self.office, + self.email, + self.monday, + self.tuesday, + self.wednesday, + self.thursday, + self.friday, + ) + + +if __name__ == "__main__": + + oh = OfficeHours() + + print(oh) diff --git a/Entity/PolyRatings.py b/Entity/PolyRatings.py new file mode 100644 index 0000000..6042c59 --- /dev/null +++ b/Entity/PolyRatings.py @@ -0,0 +1,10 @@ +# PolyRatings + + +class PolyRating: + def __init__(self, id, avg_rating, num_ratings, professors_id): + self.id = id + self.avg_rating = avg_rating + self.num_ratings = num_ratings + self.professors_id = professors_id + self.is_view = False diff --git a/Entity/Prerequisites.py b/Entity/Prerequisites.py new file mode 100644 index 0000000..e7f1197 --- /dev/null +++ b/Entity/Prerequisites.py @@ -0,0 +1,5 @@ +class Prerequisites: + def __init__(self, courseId, prereqCourse): + self.courseId = courseId + self.prereqCourse = prereqCourse + self.is_view = False diff --git a/Entity/ProfessorSectionView.py b/Entity/ProfessorSectionView.py new file mode 100644 index 0000000..c535cb5 --- /dev/null +++ b/Entity/ProfessorSectionView.py @@ -0,0 +1,46 @@ +from sqlalchemy import Column, Integer, String, Enum, Text +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.dialects.mysql import SET +import enum + +Base = declarative_base() + + +class SectionType(enum.Enum): + activity = Act = 0 + seminar = Sem = 1 + independent = Ind = 2 + lab = 3 + lecture = Lec = 4 + + +class ProfessorSectionView(Base): + __tablename__ = "Professor_Teaches_Section" + id_sections = Column(Integer) + prof_alias_id = Column(Integer) + section_name = Column(String(255), primary_key=True) + instructor = Column(String(255)) + prof_email_alias = Column(String(255)) + title = Column(String(255)) + phone = Column(String(255)) + office = Column(String(255)) + type = Column(Enum(SectionType)) + days = Column(SET("M", "T", "W", "R", "F")) + start = Column(String(255)) + end = Column(String(255)) + location = Column(String(255)) + department = Column(String(255)) + first_name = Column(String(50)) + last_name = Column(String(50)) + phone_number = Column(String(20)) + research_interests = Column(Text) + email = Column(String(255)) + is_view = True + + def __repr__(self): + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" diff --git a/Entity/Professors.py b/Entity/Professors.py new file mode 100644 index 0000000..1a607cd --- /dev/null +++ b/Entity/Professors.py @@ -0,0 +1,47 @@ +from sqlalchemy import Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base +import enum + +# This is the way SQLAlchemy initializes their special classes +Base = declarative_base() + + +class Professors(Base): + __tablename__ = "Professors" + id = Column(Integer, primary_key=True) + first_name = Column(String(50)) + last_name = Column(String(50)) + phone_number = Column(String(20)) + research_interests = Column(Text) + email = Column(String(255)) + is_view = False + synonyms = [ + "professors", + "teachers", + "lecturers", + "faculty", + "staff", + "administration", + "instructors", + "educators", + "TA", + ] + + def __str__(self): + return self.__repr__() + + def __repr__(self): + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" + + +class ProfessorsProperties(enum.Enum): + first_name = Professors.first_name + last_name = Professors.last_name + phone_number = Professors.phone_number + research_interests = Professors.research_interests + email = Professors.email diff --git a/Entity/Profs.py b/Entity/Profs.py new file mode 100644 index 0000000..b7fb766 --- /dev/null +++ b/Entity/Profs.py @@ -0,0 +1,40 @@ +from sqlalchemy import Column, String, Text +from sqlalchemy.ext.declarative import declarative_base + +# This is the way SQLAlchemy initializes their special classes +Base = declarative_base() + + +class Profs(Base): + __tablename__ = "Profs" + first_name = Column(String(50)) + last_name = Column(String(50)) + phone_number = Column(String(20)) + research_interests = Column(Text) + email = Column(String(255), primary_key=True) + office_hours = Column(Text) + platform = Column(Text) + latest_quarter = Column(Text) + office = Column(Text) + department = Column(String(255)) + title = Column(String(255)) + is_view = True + synonyms = [ + "professors", + "teachers", + "lecturers", + "faculty", + "staff", + "administration", + "instructors", + "educators", + "TA", + ] + + def __repr__(self): + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" diff --git a/Entity/QueryFeedback.py b/Entity/QueryFeedback.py new file mode 100644 index 0000000..da5abd4 --- /dev/null +++ b/Entity/QueryFeedback.py @@ -0,0 +1,29 @@ +from sqlalchemy import Column, DateTime, Text, Enum, Integer +from sqlalchemy.ext.declarative import declarative_base +import enum +from Entity.QuestionAnswerPair import AnswerType + + +# This is the way SQLAlchemy initializes their special classes +Base = declarative_base() + + +class QueryFeedback(Base): + __tablename__ = "QueryFeedback" + id = Column(Integer, primary_key=True) + question = Column(Text) + answer = Column(Text) + answer_type = Column(Enum(AnswerType)) + timestamp = Column(DateTime) + is_view = False + + def __repr__(self): + """ + A lazy __repr__ inspired by https://stackoverflow.com/a/60087190 + """ + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" # noqa diff --git a/Entity/QuestionAnswerPair.py b/Entity/QuestionAnswerPair.py new file mode 100644 index 0000000..f440eb9 --- /dev/null +++ b/Entity/QuestionAnswerPair.py @@ -0,0 +1,144 @@ +from sqlalchemy import Column, Integer, Text, Enum, Boolean +from sqlalchemy.ext.declarative import declarative_base +from Entity.Entity import Entity +import enum + +# This is the way SQLAlchemy initializes their special classes +Base = declarative_base() + + +class AnswerType(enum.Enum): + """ + The Nimbus club spent many hours manually categorizing + many question_formats as one of the following AnswerTypes + """ + + fact = 1 # a simple property of a Nimbus entity. + related = 2 # a property of two or more related Nimbus entities. + statistics = 3 # an aggregation of a property of a Nimbus entity + other = 4 # something difficult or unknown. + + +class QuestionAnswerPair(Entity, Base): + __tablename__ = "QuestionAnswerPair" + id = Column(Integer, primary_key=True) + # SQLAlchemy resolves Boolean to TINYINT within MYSQL + can_we_answer = Column(Boolean) + verified = Column(Boolean) + answer_type = Column(Enum(AnswerType)) + question_format = Column(Text) + answer_format = Column(Text) + is_view = False + + validators = { + "question": { + "validate": lambda data: "question" in data + and type(data["question"]) == str, + "error_message": "Question wasn't provided as a string.", + }, + "answer": { + "validate": lambda data: "answer" in data and type(data["answer"]) == str, + "error_message": "Answer wasn't provided as a string.", + }, + "type": { + "validate": lambda data: "type" in data + and data["type"].lower() in [t.name for t in AnswerType], + "error_message": "Provided QAPair type is not provided or not supported.", + }, + "verified": { + "validate": lambda data: type(data["verified"]) == bool + or data["verified"] in ["true", "false"], + "error_message": "Verified is not a boolean-like value.", + }, + "isAnswerable": { + "validate": lambda data: type(data["isAnswerable"]) == bool + or data["isAnswerable"] in ["true", "false"], + "error_message": "isAnswerable is not a boolean-like value.", + }, + "id": { + "validate": lambda data: type(data["id"]) == int or data["id"].is_digit(), + "error_message": "id is not a valid type or not provided.", + }, + } + + formatters = { + "question": lambda form: ("question_format", form["question"]), + "answer": lambda form: ("answer_format", form["answer"]), + "type": lambda form: ("answer_type", AnswerType[form["type"].lower()]) + if "type" in form + else AnswerType.other, + "isAnswerable": lambda form: ( + "can_we_answer", + form["isAnswerable"] + if type(form["isAnswerable"]) == bool + else form["isAnswerable"] == "true", + ), + "verified": lambda form: ( + "verified", + form["verified"] + if type(form["verified"]) == bool + else form["verified"] == "true", + ), + "id": lambda form: ("id", int(form["id"])), + } + + def validate(self, data): + required_fields = ["question", "answer", "type"] + optional_fields = ["isAnswerable", "verified"] + for field in required_fields: + validator = self.validators[field] + valid = validator["validate"](data) + if not valid: + raise Exception(validator["error_message"]) + for field in optional_fields: + if field not in data: + continue + validator = self.validators[field] + valid = validator["validate"](data) + if not valid: + raise Exception(validator["error_message"]) + + def format(self, data) -> dict: + form = {} + for field, val in data.items(): + if field not in self.formatters: + continue + key, value = self.formatters[field](form) + form[key] = value + return form + + def update(self, data: dict) -> bool: + try: + for key, value in data.items(): + # validate + validator = self.validators[key] + valid = validator["validate"](data) + if not valid: + raise Exception(validator["error_message"]) + # format + name, value = self.formatters[key](data) + # update + setattr(self, name, value) + except Exception: + return False + return True + + def get_data(self): + return { + "can_we_answer": self.can_we_answer, + "verified": self.verified, + "answer_type": self.answer_type, + "question_format": self.question_format, + "answer_format": self.answer_format, + } + + def __repr__(self): + """ + A lazy __repr__ inspired by https://stackoverflow.com/a/60087190 + """ + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" # noqa diff --git a/Entity/QuestionLog.py b/Entity/QuestionLog.py new file mode 100644 index 0000000..ab002fc --- /dev/null +++ b/Entity/QuestionLog.py @@ -0,0 +1,23 @@ +from sqlalchemy import Text, Integer, Column, TIMESTAMP +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class QuestionLog(Base): + __tablename__ = "QuestionLog" + id = Column(Integer, primary_key=True) + question = Column(Text) + timestamp = Column(TIMESTAMP) + is_view = False + + def __repr__(self): + """ + A lazy __repr__ inspired by https://stackoverflow.com/a/60087190 + """ + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" # noqa diff --git a/Entity/Sections.py b/Entity/Sections.py new file mode 100644 index 0000000..e624200 --- /dev/null +++ b/Entity/Sections.py @@ -0,0 +1,41 @@ +from sqlalchemy import Column, Integer, String, Enum +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.dialects.mysql import SET +import enum + +Base = declarative_base() + + +class SectionType(enum.Enum): + NA = 0 + activity = Act = 1 + seminar = Sem = 2 + independent = Ind = 3 + lab = 4 + lecture = Lec = 5 + + +class Sections(Base): + __tablename__ = "Sections" + id_sections = Column(Integer, primary_key=True) + section_name = Column(String(255)) + instructor = Column(String(255)) + alias = Column(String(255)) + title = Column(String(255)) + phone = Column(String(255)) + office = Column(String(255)) + type = Column(Enum(SectionType)) + days = Column(String(255)) + start = Column(String(255)) + end = Column(String(255)) + location = Column(String(255)) + department = Column(String(255)) + is_view = False + + def __repr__(self): + D = self.__dict__ + attributes = [ + f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_") + ] + attributes_string = ", ".join(attributes) + return f"{self.__class__.__name__}({attributes_string})" diff --git a/Entity/__init__.py b/Entity/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3a98632 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +all: + # + # let's `make` life easier xD + # + pip3 install invoke + invoke list + + +docker: + invoke docker diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..8390cfa --- /dev/null +++ b/Pipfile @@ -0,0 +1,66 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] +## mostly used in tests +requests = "==2.23.0" +## auto formatter +### may need to run +### `pipenv lock --pre` +### or +### `pipenv install --pre` +### to "Allow pre-releases" for black +black = "==19.10b0" +## linter +flake8 = "==3.7.9" +## testing +hypothesis = "==5.3.1" +pytest = "==5.3.4" +## type-checking +pyre-check = "==0.0.41" +## like the Unix `make` but better +invoke = "==1.4.1" +coverage = "*" + +[packages] +# REST API +## minimal framework +Flask = "==1.1.1" +## for security +Flask-Cors = "==3.0.8" +## used with flask in deployment +gunicorn = "==20.0.4" +## simple JSON (de)serialization +marshmallow = "*" +# save to google drive +PyDrive = "==1.3.1" +# database +## mysql client +mysql-connector-python = "==8.0.18" +## object-relational mapper +SQLAlchemy = "==1.3.13" +# QA.py +pandas = "==1.0.1" +fuzzywuzzy = "==0.18.0" +python-Levenshtein = "==0.12.0" +# natural language processing +google-api-core = "==1.16.0" +google-cloud = "==0.34.0" +google-cloud-automl = "==0.10.0" +nltk = "==3.4.5" +spacy = "==2.2.3" +scikit-learn = "==0.20.2" +# misc +## used to generate settings.yml +PyYAML = "==5.3" +## avoid PermissionError: [Errno 13] Permission denied +## for more context see issue #100 +six = "==1.11.0" +idna = "==2.6" +## because google-auth 1.11.2 wants setuptools>=40.3.0 +setuptools = ">=40.3.0" + +[requires] +python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..2ffb464 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,1098 @@ +{ + "_meta": { + "hash": { + "sha256": "9129a404b7674ce8ccf71d139fa02779d6b960bd79d29f28673cb51c8d806fe4" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.6" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "blis": { + "hashes": [ + "sha256:00473602629ba69fe6565108e21957e918cb48b59f5bf2f6bfb6e04de42500cb", + "sha256:03c368c9716ca814c436550a5f1e02ccf74850e613602519e3941d212e5aa177", + "sha256:135450caabc8aea9bb9250329ebdf7189982d9b57d5c92789b2ba2fe52c247a7", + "sha256:1402d9cbb0fbc21b749dd5b87d7ee14249e74a0ca38be6ecc56b3b356fca2f21", + "sha256:26b16d6005bb2671699831b5cc699905215d1abde1ec5c1d04de7dcd9eb29f75", + "sha256:3347a4b1b7d3ae14476aac9a6f7bf8ebf464863f4ebf4aea228874a7694ea240", + "sha256:38fe877a4b52e762f5e137a412e3c256545a696a12ae8c40d67b8815d2bb5097", + "sha256:4fb89c47ee06b58a4410a16fd5794847517262c9d2a342643475b477dfeff0a4", + "sha256:77a6486b9794af01bcdfd1bc6e067c93add4b93292e6f95bf6e5ce7f98bf0163", + "sha256:856142a11e37fd2c47c5006a3197e157bb8469a491a73d2d442223dd3279df84", + "sha256:8aeaf6954351593a1e412f80e398aa51df588d3c0de74b9f3323b694c603381b", + "sha256:9ede123065f3cacb109967755b3d83d4ca0de90643a9058129a6ab2d4051954f", + "sha256:d1d59faebc1c94f8f4f77154ef4b9d6d40364b111cf8fde48ee3b524c85f1075", + "sha256:d69257d317e86f34a7f230a2fd1f021fd2a1b944137f40d8cdbb23bd334cd0c4", + "sha256:ddd732c5274d1082fa92e2c42317587d5ebabce7741ca98120f69bd45d004b99", + "sha256:f0b0dad4d6268d9dba0a65a9db12dd7a2d8686b648399e4aa1aec7550697e99e" + ], + "version": "==0.4.1" + }, + "cachetools": { + "hashes": [ + "sha256:1d057645db16ca7fe1f3bd953558897603d6f0b9c51ed9d11eb4d071ec4e2aab", + "sha256:de5d88f87781602201cde465d3afe837546663b168e8b39df67411b0bf10cefc" + ], + "version": "==4.1.0" + }, + "catalogue": { + "hashes": [ + "sha256:584d78e7f4c3c6e2fd498eb56dfc8ef1f4ff738480237de2ccd26cbe2cf47172", + "sha256:d74d1d856c6b36a37bf14aa6dbbc27d0582667b7ab979a6108e61a575e8723f5" + ], + "version": "==1.0.0" + }, + "certifi": { + "hashes": [ + "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304", + "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519" + ], + "version": "==2020.4.5.1" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "click": { + "hashes": [ + "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", + "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" + ], + "version": "==7.1.2" + }, + "cymem": { + "hashes": [ + "sha256:5083b2ab5fe13ced094a82e0df465e2dbbd9b1c013288888035e24fd6eb4ed01", + "sha256:622c20a57701d02f01a47e856dea248e112638f28c8249dbe3ed95a9702e3d74", + "sha256:6f4cb689a9552e9e13dccc89203c8ab09f210a7ffb92ce27c384a4a0be27b527", + "sha256:719f04a11ca709fc2b47868070d79fccff77e5d502ff32de2f4baa73cb16166f", + "sha256:7236252bed70f37b898933dcf8aa875d0829664a245a272516f27b30439df71c", + "sha256:7f5ddceb12b73f7fd2e4398266401b6f887003740ccd18c989a2af04500b5f2b", + "sha256:85b9364e099426bd7f445a7705aad87bf6dbb71d79e3802dd8ca14e181d38a33", + "sha256:c288a1bbdf58c360457443e5297e74844e1961e5e7001dbcb3a5297a41911a11", + "sha256:cd21ec48ee70878d46c486e2f7ae94b32bfc6b37c4d27876c5a5a00c4eb75c3c", + "sha256:d7505c500d994f11662e5595f5002251f572acc189f18944619352e2636f5181", + "sha256:dd24848fbd75b17bab06408da6c029ba7cc615bd9e4a1f755fb3a090025fb922", + "sha256:f4f19af4bca81f11922508a9dcf30ce1d2aee4972af9f81ce8e5331a6f46f5e1" + ], + "version": "==2.0.3" + }, + "flask": { + "hashes": [ + "sha256:13f9f196f330c7c2c5d7a5cf91af894110ca0215ac051b5844701f2bfd934d52", + "sha256:45eb5a6fd193d6cf7e0cf5d8a5b31f83d5faae0293695626f539a823e93b13f6" + ], + "index": "pypi", + "version": "==1.1.1" + }, + "flask-cors": { + "hashes": [ + "sha256:72170423eb4612f0847318afff8c247b38bd516b7737adfc10d1c2cdbb382d16", + "sha256:f4d97201660e6bbcff2d89d082b5b6d31abee04b1b3003ee073a6fd25ad1d69a" + ], + "index": "pypi", + "version": "==3.0.8" + }, + "fuzzywuzzy": { + "hashes": [ + "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", + "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993" + ], + "index": "pypi", + "version": "==0.18.0" + }, + "google-api-core": { + "extras": [ + "grpc" + ], + "hashes": [ + "sha256:859f7392676761f2b160c6ee030c3422135ada4458f0948c5690a6a7c8d86294", + "sha256:92e962a087f1c4b8d1c5c88ade1c1dfd550047dcffb320c57ef6a534a20403e2" + ], + "index": "pypi", + "version": "==1.16.0" + }, + "google-api-python-client": { + "hashes": [ + "sha256:b764be88cf2a1f8b4c4d17c9187a279fea93f4d767e7d7c24f71bf25385a8b10", + "sha256:be4e8dcf399d7d1dcaae004b7f18694907f740bf6e6cebda91da8ebd968c5481" + ], + "version": "==1.8.3" + }, + "google-auth": { + "hashes": [ + "sha256:73b141d122942afe12e8bfdcb6900d5df35c27d39700f078363ba0b1298ad33b", + "sha256:fbf25fee328c0828ef293459d9c649ef84ee44c0b932bb999d19df0ead1b40cf" + ], + "version": "==1.15.0" + }, + "google-auth-httplib2": { + "hashes": [ + "sha256:098fade613c25b4527b2c08fa42d11f3c2037dda8995d86de0745228e965d445", + "sha256:f1c437842155680cf9918df9bc51c1182fda41feef88c34004bd1978c8157e08" + ], + "version": "==0.0.3" + }, + "google-cloud": { + "hashes": [ + "sha256:01430187cf56df10a9ba775dd547393185d4b40741db0ea5889301f8e7a9d5d3", + "sha256:fb1ab7b0548fe44b3d538041f0a374505b7f990d448a935ea36649c5ccab5acf" + ], + "index": "pypi", + "version": "==0.34.0" + }, + "google-cloud-automl": { + "hashes": [ + "sha256:11b65c78e7e1ede67e9e162324f249b6c566c18bb06ace54f7579ea45d18230c", + "sha256:2f086d3e521103df3adfa66c8571613c22036f466da6ae23c41de971145518a6" + ], + "index": "pypi", + "version": "==0.10.0" + }, + "googleapis-common-protos": { + "hashes": [ + "sha256:013c91704279119150e44ef770086fdbba158c1f978a6402167d47d5409e226e" + ], + "version": "==1.51.0" + }, + "grpcio": { + "hashes": [ + "sha256:10cdc8946a7c2284bbc8e16d346eaa2beeaae86ea598f345df86d4ef7dfedb84", + "sha256:23bc395a32c2465564cb242e48bdd2fdbe5a4aebf307649a800da1b971ee7f29", + "sha256:2637ce96b7c954d2b71060f50eb4c72f81668f1b2faa6cbdc74677e405978901", + "sha256:3d8c510b6eabce5192ce126003d74d7751c7218d3e2ad39fcf02400d7ec43abe", + "sha256:5024b26e17a1bfc9390fb3b8077bf886eee02970af780fd23072970ef08cefe8", + "sha256:517538a54afdd67162ea2af1ac3326c0752c5d13e6ddadbc4885f6a28e91ab28", + "sha256:524ae8d3da61b856cf08abb3d0947df05402919e4be1f88328e0c1004031f72e", + "sha256:54e4658c09084b09cd83a5ea3a8bce78e4031ff1010bb8908c399a22a76a6f08", + "sha256:57c8cc2ae8cb94c3a89671af7e1380a4cdfcd6bab7ba303f4461ec32ded250ae", + "sha256:5fd9ffe938e9225c654c60eb21ff011108cc27302db85200413807e0eda99a4a", + "sha256:75b2247307a7ecaf6abc9eb2bd04af8f88816c111b87bf0044d7924396e9549c", + "sha256:7bf3cb1e0f4a9c89f7b748583b994bdce183103d89d5ff486da48a7668a052c7", + "sha256:7e02a7c40304eecee203f809a982732bd37fad4e798acad98fe73c66e44ff2db", + "sha256:806c9759f5589b3761561187408e0313a35c5c53f075c7590effab8d27d67dfe", + "sha256:80e9f9f6265149ca7c84e1c8c31c2cf3e2869c45776fbe8880a3133a11d6d290", + "sha256:81bbf78a399e0ee516c81ddad8601f12af3fc9b30f2e4b2fbd64efd327304a4d", + "sha256:886d48c32960b39e059494637eb0157a694956248d03b0de814447c188b74799", + "sha256:97b72bf2242a351a89184134adbb0ae3b422e6893c6c712bc7669e2eab21501b", + "sha256:97fcbdf1f12e0079d26db73da11ee35a09adc870b1e72fbff0211f6a8003a4e8", + "sha256:9cfb4b71cc3c8757f137d47000f9d90d4bd818733f9ab4f78bd447e052a4cb9a", + "sha256:9ef0370bcf629ece4e7e37796e4604e2514b920669be2911fc3f9c163a73a57b", + "sha256:a6dddb177b3cfa0cfe299fb9e07d6a3382cc79466bef48fe9c4326d5c5b1dcb8", + "sha256:a97ea91e31863c9a3879684b5fb3c6ab4b17c5431787548fc9f52b9483ea9c25", + "sha256:b49f243936b0f6ae8eb6adf88a1e54e736f1c6724a1bff6b591d105d708263ad", + "sha256:b85f355fc24b68a6c52f2750e7141110d1fcd07dfdc9b282de0000550fe0511b", + "sha256:c3a0ef12ee86f6e72db50e01c3dba7735a76d8c30104b9b0f7fd9d65ceb9d93f", + "sha256:da0ca9b1089d00e39a8b83deec799a4e5c37ec1b44d804495424acde50531868", + "sha256:e90f3d11185c36593186e5ff1f581acc6ddfa4190f145b0366e579de1f52803b", + "sha256:ebf0ccb782027ef9e213e03b6d00bbd8dabd80959db7d468c0738e6d94b5204c", + "sha256:eede3039c3998e2cc0f6713f4ac70f235bd32967c9b958a17bf937aceebc12c3", + "sha256:ff7931241351521b8df01d7448800ce0d59364321d8d82c49b826d455678ff08" + ], + "version": "==1.29.0" + }, + "gunicorn": { + "hashes": [ + "sha256:1904bb2b8a43658807108d59c3f3d56c2b6121a701161de0ddf9ad140073c626", + "sha256:cd4a810dd51bf497552cf3f863b575dabd73d6ad6a91075b65936b151cbf4f9c" + ], + "index": "pypi", + "version": "==20.0.4" + }, + "httplib2": { + "hashes": [ + "sha256:4f6988e6399a2546b525a037d56da34aed4d149bbdc0e78523018d5606c26e74", + "sha256:b0e1f3ed76c97380fe2485bc47f25235453b40ef33ca5921bb2897e257a49c4c" + ], + "index": "pypi", + "version": "==0.18.0" + }, + "idna": { + "hashes": [ + "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f", + "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4" + ], + "index": "pypi", + "version": "==2.6" + }, + "importlib-metadata": { + "hashes": [ + "sha256:2a688cbaa90e0cc587f1df48bdc97a6eadccdcd9c35fb3f976a09e3b5016d90f", + "sha256:34513a8a0c4962bc66d35b359558fd8a5e10cd472d37aec5f66858addef32c1e" + ], + "markers": "python_version < '3.8'", + "version": "==1.6.0" + }, + "itsdangerous": { + "hashes": [ + "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", + "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" + ], + "version": "==1.1.0" + }, + "jinja2": { + "hashes": [ + "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", + "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" + ], + "version": "==2.11.2" + }, + "markupsafe": { + "hashes": [ + "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", + "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", + "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", + "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", + "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", + "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", + "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", + "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", + "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", + "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", + "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", + "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b", + "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", + "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", + "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", + "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", + "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", + "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", + "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", + "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", + "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", + "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", + "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", + "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", + "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", + "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", + "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", + "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", + "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", + "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", + "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2", + "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7", + "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be" + ], + "version": "==1.1.1" + }, + "marshmallow": { + "hashes": [ + "sha256:90854221bbb1498d003a0c3cc9d8390259137551917961c8b5258c64026b2f85", + "sha256:ac2e13b30165501b7d41fc0371b8df35944f5849769d136f20e2c5f6cdc6e665" + ], + "index": "pypi", + "version": "==3.5.1" + }, + "murmurhash": { + "hashes": [ + "sha256:27b908fe4bdb426f4e4e4a8821acbe0302915b2945e035ec9d8ca513e2a74b1f", + "sha256:33405103fa8cde15d72ee525a03d5cfe2c7e4901133819754810986e29627d68", + "sha256:386a9eed3cb27cb2cd4394b6521275ba04552642c2d9cab5c9fb42aa5a3325c0", + "sha256:3af36a0dc9f13f6892d9b8b39a6a3ccf216cae5bce38adc7c2d145677987772f", + "sha256:717196a04cdc80cc3103a3da17b2415a8a5e1d0d578b7079259386bf153b3258", + "sha256:8a4ed95cd3456b43ea301679c7c39ade43fc18b844b37d0ba0ac0d6acbff8e0c", + "sha256:8b045a79e8b621b4b35b29f29e33e9e0964f3a276f7da4d5736142f322ad4842", + "sha256:a6c071b4b498bcea16a8dc8590cad81fa8d43821f34c74bc00f96499e2527073", + "sha256:b0afe329701b59d02e56bc6cee7325af83e3fee9c299c615fc1df3202b4f886f", + "sha256:ba766343bdbcb928039b8fff609e80ae7a5fd5ed7a4fc5af822224b63e0cbaff", + "sha256:bf33490514d308bcc27ed240cb3eb114f1ec31af031535cd8f27659a7049bd52", + "sha256:c7a646f6b07b033642b4f52ae2e45efd8b80780b3b90e8092a0cec935fbf81e2", + "sha256:cc97ea766ac545074bab0e5af3dbc48e0d05ba230ae5a404e284d39abe4b3baf", + "sha256:d696c394ebd164ca80b5871e2e9ad2f9fdbb81bd3c552c1d5f1e8ee694e6204a", + "sha256:f468e4868f78c3ac202a66abfe2866414bca4ae7666a21ef0938c423de0f7d50", + "sha256:fe344face8d30a5a6aa26e5acf288aa2a8f0f32e05efdda3d314b4bf289ec2af" + ], + "version": "==1.0.2" + }, + "mysql-connector-python": { + "hashes": [ + "sha256:033a8ab1d772ce77ce6cbbaca5bff400bfb65a2a3542b701061c981222a0fefd", + "sha256:0fc51c4360286646244e97f30826ebc0ff362846d1226d35e99ec94b10543fac", + "sha256:268366d8b807f1216bac3d467fa0e51798880da6a3965fdead3476f2b04dd8bb", + "sha256:332654635beeb71d823fe461b8f3062f1b8621ffe839c83175f16bcca0627909", + "sha256:427ca66e5a502f8c86b5525009d2f18ef0da2ab12f7fccd8edb4183e513e8491", + "sha256:42bc551cf7d32e4aff358fe5162efb268ddc3da2e6ce37f5b71e6321e731b432", + "sha256:4357ffddb4b26065327e0a0c003659ce3a106e4a91e0f03698a2262431768f72", + "sha256:556995294a47ed38849e1d1730943a9d93a80ca450cc7c03a372816cae7c1f11", + "sha256:586906f5a5ae807501f6fa83a7fab8e9c81392a657e6df94b0c192534644dfde", + "sha256:5dbf052f34e1be01453d9591aaa69d15961de7ef80d22c188dcff8d0e6f198fe", + "sha256:5eb70297b829c2e7d5ff4f511164895a5acb827f90c0610e36678a763abbd393", + "sha256:5f0537726ae30025c910533182ebe8656186349cf7d801e01483b2fcde9537ba", + "sha256:663d714ae09e2001e602960e48f2e918dda53398b1d2641780105321f494fcd3", + "sha256:77bc6dbcf5eae803845e82a639eb2d5e5ed0ba59f8d66d1901e26df1ce2088f0", + "sha256:846d3062de596fccb3ab0928131f9a49747b5a4eb8d02433df22c2bd2abcca7a", + "sha256:8920d2cbc17a4003f99f8db71c3cc7ca4058a6585d56a72b98f9e0826772e682", + "sha256:8a27d519a57e89d5423eb2b6ca0d839d7d16e576a43d67b497d11ca2962355eb", + "sha256:8f1f68ee29b7e9b1b8c88b65dcfdfdc450574106ac05668db325b85a322e3875", + "sha256:9ba54e85cd9a3f2ac2c778b4082d6f427c54c883f54051a5fb235138cb6f091d", + "sha256:a956b77c9c73bff6e17f068fbd8d03c3631a2ef974703f784f8dbfa348c983ec", + "sha256:ac4474bf836be6696e4930884725b9de33df4d246fb433255126fb007cb8a59e", + "sha256:acbaf0c87b1398d238f0fe77af18feefc8b6c3569e7fe96307bca3ed3f0eb240", + "sha256:c3d2dbd81e78d8d2cd1504483daf930219e623b3b9f269a2c2b3bad79a031fa5" + ], + "index": "pypi", + "version": "==8.0.18" + }, + "nltk": { + "hashes": [ + "sha256:bed45551259aa2101381bbdd5df37d44ca2669c5c3dad72439fa459b29137d94" + ], + "index": "pypi", + "version": "==3.4.5" + }, + "numpy": { + "hashes": [ + "sha256:00d7b54c025601e28f468953d065b9b121ddca7fff30bed7be082d3656dd798d", + "sha256:02ec9582808c4e48be4e93cd629c855e644882faf704bc2bd6bbf58c08a2a897", + "sha256:0e6f72f7bb08f2f350ed4408bb7acdc0daba637e73bce9f5ea2b207039f3af88", + "sha256:1be2e96314a66f5f1ce7764274327fd4fb9da58584eaff00b5a5221edefee7d6", + "sha256:2466fbcf23711ebc5daa61d28ced319a6159b260a18839993d871096d66b93f7", + "sha256:2b573fcf6f9863ce746e4ad00ac18a948978bb3781cffa4305134d31801f3e26", + "sha256:3f0dae97e1126f529ebb66f3c63514a0f72a177b90d56e4bce8a0b5def34627a", + "sha256:50fb72bcbc2cf11e066579cb53c4ca8ac0227abb512b6cbc1faa02d1595a2a5d", + "sha256:57aea170fb23b1fd54fa537359d90d383d9bf5937ee54ae8045a723caa5e0961", + "sha256:709c2999b6bd36cdaf85cf888d8512da7433529f14a3689d6e37ab5242e7add5", + "sha256:7d59f21e43bbfd9a10953a7e26b35b6849d888fc5a331fa84a2d9c37bd9fe2a2", + "sha256:904b513ab8fbcbdb062bed1ce2f794ab20208a1b01ce9bd90776c6c7e7257032", + "sha256:96dd36f5cdde152fd6977d1bbc0f0561bccffecfde63cd397c8e6033eb66baba", + "sha256:9933b81fecbe935e6a7dc89cbd2b99fea1bf362f2790daf9422a7bb1dc3c3085", + "sha256:bbcc85aaf4cd84ba057decaead058f43191cc0e30d6bc5d44fe336dc3d3f4509", + "sha256:dccd380d8e025c867ddcb2f84b439722cf1f23f3a319381eac45fd077dee7170", + "sha256:e22cd0f72fc931d6abc69dc7764484ee20c6a60b0d0fee9ce0426029b1c1bdae", + "sha256:ed722aefb0ebffd10b32e67f48e8ac4c5c4cf5d3a785024fdf0e9eb17529cd9d", + "sha256:efb7ac5572c9a57159cf92c508aad9f856f1cb8e8302d7fdb99061dbe52d712c", + "sha256:efdba339fffb0e80fcc19524e4fdbda2e2b5772ea46720c44eaac28096d60720", + "sha256:f22273dd6a403ed870207b853a856ff6327d5cbce7a835dfa0645b3fc00273ec" + ], + "version": "==1.18.4" + }, + "oauth2client": { + "hashes": [ + "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", + "sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6" + ], + "version": "==4.1.3" + }, + "pandas": { + "hashes": [ + "sha256:23e177d43e4bf68950b0f8788b6a2fef2f478f4ec94883acb627b9264522a98a", + "sha256:2530aea4fe46e8df7829c3f05e0a0f821c893885d53cb8ac9b89cc67c143448c", + "sha256:303827f0bb40ff610fbada5b12d50014811efcc37aaf6ef03202dc3054bfdda1", + "sha256:3b019e3ea9f5d0cfee0efabae2cfd3976874e90bcc3e97b29600e5a9b345ae3d", + "sha256:3c07765308f091d81b6735d4f2242bb43c332cc3461cae60543df6b10967fe27", + "sha256:5036d4009012a44aa3e50173e482b664c1fae36decd277c49e453463798eca4e", + "sha256:6f38969e2325056f9959efbe06c27aa2e94dd35382265ad0703681d993036052", + "sha256:74a470d349d52b9d00a2ba192ae1ee22155bb0a300fd1ccb2961006c3fa98ed3", + "sha256:7d77034e402165b947f43050a8a415aa3205abfed38d127ea66e57a2b7b5a9e0", + "sha256:7f9a509f6f11fa8b9313002ebdf6f690a7aa1dd91efd95d90185371a0d68220e", + "sha256:942b5d04762feb0e55b2ad97ce2b254a0ffdd344b56493b04a627266e24f2d82", + "sha256:a9fbe41663416bb70ed05f4e16c5f377519c0dc292ba9aa45f5356e37df03a38", + "sha256:d10e83866b48c0cdb83281f786564e2a2b51a7ae7b8a950c3442ad3c9e36b48c", + "sha256:e2140e1bbf9c46db9936ee70f4be6584d15ff8dc3dfff1da022d71227d53bad3" + ], + "index": "pypi", + "version": "==1.0.1" + }, + "plac": { + "hashes": [ + "sha256:398cb947c60c4c25e275e1f1dadf027e7096858fb260b8ece3b33bcff90d985f", + "sha256:487e553017d419f35add346c4c09707e52fa53f7e7181ce1098ca27620e9ceee" + ], + "version": "==1.1.3" + }, + "preshed": { + "hashes": [ + "sha256:0c15ae62f2595ca479decc3452967484dae57b510278800f5deb9115238cc818", + "sha256:190345724eb3f7aeaeb2a758740d698bd6c017c2cdf07c71c16b34820973d114", + "sha256:1be3cb59211282e906a11443464fe3e19f6561e2fcd06410e4adc6d45354cf82", + "sha256:1ef72a120e49356058b3c0590d7b5e91f2747b44e006eef6579be6131223cab0", + "sha256:253970beae87ab672a6afb543908761795eea3cb7b0d784e2ea51e265752059e", + "sha256:448d9df12e63fe4a3024f6153ee6703bb95d2be0ce887b5eda7ddc41acfba825", + "sha256:61d73468c97c1d6d5a048de0b01d5a6fd052123358aca4823cdb277e436436cb", + "sha256:633358f1fb0ec5dd6dbe4971c328d08809e5a8dbefdf13a802ae0a7cb45306c7", + "sha256:6518bbd5fb8adbc3231e75ae78d96a7bdd5405a3b23a09d5e62a2e4fc833724e", + "sha256:7e80ffc1fb79496d4feafe0eaf71ee5e532b91daf6cec235d7f9c4c12657a58c", + "sha256:7ea588a78aaf310ae2c293071a8571b07ae434819be05fe510442b6df3f8fbf7", + "sha256:88427346b220293439db77c82913791fa13edc6ac73d8159610699a3ca17aae9", + "sha256:8a9a8222a697a513f25a94733e7a17cc298ecd8fd56b606a1d8fa0ac342c2830", + "sha256:b4ae6c7c44aa3ff7bd717791bb6b619ecb273b7cb128c986f2dc65f6e0e6ddd4", + "sha256:e37058d91bd7f0f5a7a9c83d22a83dc581ab5f79688a87be81f200993145a250", + "sha256:ece5e850f667eaa3367d5c56dda9e3aa6ac1c0bb2117d2f466a26db5f26bbe4b" + ], + "version": "==3.0.2" + }, + "protobuf": { + "hashes": [ + "sha256:00c2c276aca3af220d422e6a8625b1f5399c821c9b6f1c83e8a535aa8f48cc6c", + "sha256:0d69d76b00d0eb5124cb33a34a793383a5bbbf9ac3e633207c09988717c5da85", + "sha256:1c55277377dd35e508e9d86c67a545f6d8d242d792af487678eeb75c07974ee2", + "sha256:35bc1b96241b8ea66dbf386547ef2e042d73dcc0bf4b63566e3ef68722bb24d1", + "sha256:47a541ac44f2dcc8d49b615bcf3ed7ba4f33af9791118cecc3d17815fab652d9", + "sha256:61364bcd2d85277ab6155bb7c5267e6a64786a919f1a991e29eb536aa5330a3d", + "sha256:7aaa820d629f8a196763dd5ba21fd272fa038f775a845a52e21fa67862abcd35", + "sha256:9593a6cdfc491f2caf62adb1c03170e9e8748d0a69faa2b3970e39a92fbd05a2", + "sha256:95f035bbafec7dbaa0f1c72eda8108b763c1671fcb6e577e93da2d52eb47fbcf", + "sha256:9d6a517ce33cbdc64b52a17c56ce17b0b20679c945ed7420e7c6bc6686ff0494", + "sha256:a7532d971e4ab2019a9f6aa224b209756b6b9e702940ca85a4b1ed1d03f45396", + "sha256:b4e8ecb1eb3d011f0ccc13f8bb0a2d481aa05b733e6e22e9d46a3f61dbbef0de", + "sha256:bb1aced9dcebc46f0b320f24222cc8ffdfd2e47d2bafd4d2e5913cc6f7e3fc98", + "sha256:ccce142ebcfbc35643a5012cf398497eb18e8d021333cced4d5401f034a8cef5", + "sha256:d538eecc0b80accfb73c8167f39aaa167a5a50f31b1295244578c8eff8e9d602", + "sha256:eab18765eb5c7bad1b2de7ae3774192b46e1873011682e36bcd70ccf75f2748a" + ], + "version": "==3.12.0" + }, + "pyasn1": { + "hashes": [ + "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", + "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba" + ], + "version": "==0.4.8" + }, + "pyasn1-modules": { + "hashes": [ + "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", + "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74" + ], + "version": "==0.2.8" + }, + "pydrive": { + "hashes": [ + "sha256:5b94e971430722eb5c40a090f21df46b32e51399d747c1511796f63f902d1095", + "sha256:83890dcc2278081c6e3f6a8da1f8083e25de0bcc8eb7c91374908c5549a20787" + ], + "index": "pypi", + "version": "==1.3.1" + }, + "python-dateutil": { + "hashes": [ + "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", + "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" + ], + "version": "==2.8.1" + }, + "python-levenshtein": { + "hashes": [ + "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1" + ], + "index": "pypi", + "version": "==0.12.0" + }, + "pytz": { + "hashes": [ + "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", + "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048" + ], + "version": "==2020.1" + }, + "pyyaml": { + "hashes": [ + "sha256:059b2ee3194d718896c0ad077dd8c043e5e909d9180f387ce42012662a4946d6", + "sha256:1cf708e2ac57f3aabc87405f04b86354f66799c8e62c28c5fc5f88b5521b2dbf", + "sha256:24521fa2890642614558b492b473bee0ac1f8057a7263156b02e8b14c88ce6f5", + "sha256:4fee71aa5bc6ed9d5f116327c04273e25ae31a3020386916905767ec4fc5317e", + "sha256:70024e02197337533eef7b85b068212420f950319cc8c580261963aefc75f811", + "sha256:74782fbd4d4f87ff04159e986886931456a1894c61229be9eaf4de6f6e44b99e", + "sha256:940532b111b1952befd7db542c370887a8611660d2b9becff75d39355303d82d", + "sha256:cb1f2f5e426dc9f07a7681419fe39cee823bb74f723f36f70399123f439e9b20", + "sha256:dbbb2379c19ed6042e8f11f2a2c66d39cceb8aeace421bfc29d085d93eda3689", + "sha256:e3a057b7a64f1222b56e47bcff5e4b94c4f61faac04c7c4ecb1985e18caa3994", + "sha256:e9f45bd5b92c7974e59bcd2dcc8631a6b6cc380a904725fce7bc08872e691615" + ], + "index": "pypi", + "version": "==5.3" + }, + "requests": { + "hashes": [ + "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", + "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" + ], + "version": "==2.23.0" + }, + "rsa": { + "hashes": [ + "sha256:14ba45700ff1ec9eeb206a2ce76b32814958a98e372006c8fb76ba820211be66", + "sha256:1a836406405730121ae9823e19c6e806c62bbad73f890574fff50efa4122c487" + ], + "version": "==4.0" + }, + "scikit-learn": { + "hashes": [ + "sha256:05d061606657af85365b5f71484e3362d924429edde17a90068960843ad597f5", + "sha256:071317afbb5c67fa493635376ddd724b414290255cbf6947c1155846956e93f7", + "sha256:0d03aaf19a25e59edac3099cda6879ba05129f0fa1e152e23b728ccd36104f57", + "sha256:1665ea0d4b75ef24f5f2a9d1527b7296eeabcbe3a1329791c954541e2ebde5a2", + "sha256:24eccb0ff31f84e88e00936c09197735ef1dcabd370aacb10e55dbc8ee464a78", + "sha256:27b48cabacce677a205e6bcda1f32bdc968fbf40cd2aa0a4f52852f6997fce51", + "sha256:2c51826b9daa87d7d356bebd39f8665f7c32e90e3b21cbe853d6c7f0d6b0d23b", + "sha256:3116299d392bd1d054655fa2a740e7854de87f1d573fa85503e64494e52ac795", + "sha256:3771861abe1fd1b2bbeaec7ba8cfca58fdedd75d790f099960e5332af9d1ff7a", + "sha256:473ba7d9a5eaec47909ee83d74b4a3be47a44505c5189d2cab67c0418cd030f1", + "sha256:621e2c91f9afde06e9295d128cb15cb6fc77dc00719393e9ec9d47119895b0d4", + "sha256:645865462c383e5faad473b93145a8aee97d839c9ad1fd7a17ae54ec8256d42b", + "sha256:80e2276d4869d302e84b7c03b5bac4a67f6cd331162e62ae775a3e5855441a60", + "sha256:84d2cfe0dee3c22b26364266d69850e0eb406d99714045929875032f91d3c918", + "sha256:87ea9ace7fe811638dfc39b850b60887509b8bfc93c4006d5552fa066d04ddc7", + "sha256:a4d1e535c75881f668010e6e53dfeb89dd50db85b05c5c45af1991c8b832d757", + "sha256:a4f14c4327d2e44567bfb3a0bee8c55470f820bc9a67af3faf200abd8ed79bf2", + "sha256:a7b3c24e193e8c6eaeac075b5d0bb0a7fea478aa2e4b991f6a7b030fc4fd410d", + "sha256:ab2919aca84f1ac6ef60a482148eec0944364ab1832e63f28679b16f9ef279c8", + "sha256:b0f79d5ff74f3c68a4198ad5b4dfa891326b5ce272dd064d11d572b25aae5b43", + "sha256:bc5bc7c7ee2572a1edcb51698a6caf11fae554194aaab9a38105d9ec419f29e6", + "sha256:bc5c750d548795def79576533f8f0f065915f17f48d6e443afce2a111f713747", + "sha256:c68969c30b3b2c1fe07c1376110928eade61da4fc29c24c9f1a89435a7d08abe", + "sha256:d3b4f791d2645fe936579d61f1ff9b5dcf0c8f50db7f0245ca8f16407d7a5a46", + "sha256:dac0cd9fdd8ac6dd6108a10558e2e0ca1b411b8ea0a3165641f9ab0b4322df4e", + "sha256:eb7ddbdf33eb822fdc916819b0ab7009d954eb43c3a78e7dd2ec5455e074922a", + "sha256:ed537844348402ed53420187b3a6948c576986d0b2811a987a49613b6a26f29e", + "sha256:fcca54733e692fe03b8584f7d4b9344f4b6e3a74f5b326c6e5f5e9d2504bdce7" + ], + "index": "pypi", + "version": "==0.20.2" + }, + "scipy": { + "hashes": [ + "sha256:00af72998a46c25bdb5824d2b729e7dabec0c765f9deb0b504f928591f5ff9d4", + "sha256:0902a620a381f101e184a958459b36d3ee50f5effd186db76e131cbefcbb96f7", + "sha256:1e3190466d669d658233e8a583b854f6386dd62d655539b77b3fa25bfb2abb70", + "sha256:2cce3f9847a1a51019e8c5b47620da93950e58ebc611f13e0d11f4980ca5fecb", + "sha256:3092857f36b690a321a662fe5496cb816a7f4eecd875e1d36793d92d3f884073", + "sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa", + "sha256:71eb180f22c49066f25d6df16f8709f215723317cc951d99e54dc88020ea57be", + "sha256:770254a280d741dd3436919d47e35712fb081a6ff8bafc0f319382b954b77802", + "sha256:787cc50cab3020a865640aba3485e9fbd161d4d3b0d03a967df1a2881320512d", + "sha256:8a07760d5c7f3a92e440ad3aedcc98891e915ce857664282ae3c0220f3301eb6", + "sha256:8d3bc3993b8e4be7eade6dcc6fd59a412d96d3a33fa42b0fa45dc9e24495ede9", + "sha256:9508a7c628a165c2c835f2497837bf6ac80eb25291055f56c129df3c943cbaf8", + "sha256:a144811318853a23d32a07bc7fd5561ff0cac5da643d96ed94a4ffe967d89672", + "sha256:a1aae70d52d0b074d8121333bc807a485f9f1e6a69742010b33780df2e60cfe0", + "sha256:a2d6df9eb074af7f08866598e4ef068a2b310d98f87dc23bd1b90ec7bdcec802", + "sha256:bb517872058a1f087c4528e7429b4a44533a902644987e7b2fe35ecc223bc408", + "sha256:c5cac0c0387272ee0e789e94a570ac51deb01c796b37fb2aad1fb13f85e2f97d", + "sha256:cc971a82ea1170e677443108703a2ec9ff0f70752258d0e9f5433d00dda01f59", + "sha256:dba8306f6da99e37ea08c08fef6e274b5bf8567bb094d1dbe86a20e532aca088", + "sha256:dc60bb302f48acf6da8ca4444cfa17d52c63c5415302a9ee77b3b21618090521", + "sha256:dee1bbf3a6c8f73b6b218cb28eed8dd13347ea2f87d572ce19b289d6fd3fbc59" + ], + "version": "==1.4.1" + }, + "six": { + "hashes": [ + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" + ], + "index": "pypi", + "version": "==1.11.0" + }, + "spacy": { + "hashes": [ + "sha256:1d14c9e7d65b2cecd56c566d9ffac8adbcb9ce2cff2274cbfdcf5468cd940e6a", + "sha256:2cb77315522cc422df7750dac778f13d8079f409b4842cf74a54ffe3b84ee5c6", + "sha256:3c83c061597b5dc94c939c511d3b72c2971257204f21976afc117a350e8fa92b", + "sha256:6971359e43841ff9ed87e1af5e87ea74d6fdb01fe54807d3e4c6a2a3798d18a4", + "sha256:708d25c7212bd20d1268c6559e191d221e88e68e152fb98b82c388d16dfdd3d7", + "sha256:713811c96396c6bb86a1da2bbbe02d874385e74dde6617a84d61d99e9d2b1105", + "sha256:7fa02ababbb3762277b81873204d78583008b408ddf6fc0ef977b38d3b462b85", + "sha256:8d1ce99fc30d634b63b15d98c49b96d6a40b0d2048d5dad0f2bb31d3f6dc5ef0", + "sha256:9afdec1aeb21dbeccfd4d702f12fe8bab88e4d7cd410785bf17f6b186cbc73e8", + "sha256:ce7fad73de7aed7ca2ee7c2404c77c72005f67ca95edae6f19f08947fb0f8ab3", + "sha256:d6a2804c457ce74f0d3bf1f4cdb00cbcd228e9da5f0bdbbbe0a856afe12db37e", + "sha256:d8791f5f69800d702b8e9457418af2cd29789b82697d17ad66df98922f081d1b" + ], + "index": "pypi", + "version": "==2.2.3" + }, + "sqlalchemy": { + "hashes": [ + "sha256:64a7b71846db6423807e96820993fa12a03b89127d278290ca25c0b11ed7b4fb" + ], + "index": "pypi", + "version": "==1.3.13" + }, + "srsly": { + "hashes": [ + "sha256:18bad26c34cf5a8853fbf018fd168a7bf2ea7ce661e66476c25dac711cb79c9b", + "sha256:2179cf1e88c250e89e40227bd5848341011c170079b3d424987d067de6a73f42", + "sha256:21cfb0e5dea2c4515b5c2daa78402d5782c6425b4f58af40d2e2cb45e4778d8c", + "sha256:29434753a77481ec6129991f4116f983085cc8005c1ad963261124842e8c05fc", + "sha256:3f3975e8cb67194d26dd03508469b1303f8b994f30e7782f7eae25fef6dc4aad", + "sha256:46213d8f094b348a9433c825ac1eba36a21aa25a8bae6f29c2f9f053e15be961", + "sha256:59258b81d567df207f8a0a33c4b5fa232afccf1d927c8ce3ba5395bfd64c0ed8", + "sha256:7c553a709fd56a37a07f969e849f55a0aeabaeb7677bebc588a640ab8ec134aa", + "sha256:95849d84e8929be248a180e672c8ce1ed98b1341263bc983efdf8427465584f1", + "sha256:b94d8a13c60e3298a9ba12b1b211026e8378c7d087efd7ce46a3f2d8d4678d94", + "sha256:c8beff52c104a7ffe4a15513a05dc0497998cf83aa1ca39454489994d18c1c07", + "sha256:d409beb7257208633c974c01f9dc3265562fb6802caee7de21880761ba87c3ed" + ], + "version": "==1.0.2" + }, + "thinc": { + "hashes": [ + "sha256:1dbaec0628040a1f8d66147fadbf7775ad6dfe4c681424b2e20479c1e54dc3c1", + "sha256:20b6ed4a8112342b433b9b3ca23b59322d07e32a9232d3cca19b0353e213eadb", + "sha256:30790a1a496a8a84fe300edf50df50454dbdb625b41b203739fbc03112a4d3b6", + "sha256:56b67887930df87c28af2cc4d046c6bc3e80ed4ff3e57208a4fb7a348d12a580", + "sha256:650fbead603bd7e73a61fd2c1b69202ad7a8eb70d4ebe7c5484b8788e828b6e0", + "sha256:713adad69c108dbdc145276d077c4a80f3df31a39b3fc574782dcb64b1def815", + "sha256:801f32f6c048de7e9f6d406342080e6348d0bb02beb1412811f9150a26661691", + "sha256:8833246f1c8b95143c91e310728bf64af8972a9d8653252efa1b4c9036837569", + "sha256:93cb9d184115a8890321dd7f5d94a0d8235dc2fca54d92a9c1c051234a7af43e", + "sha256:abe0d00cbb2cc831f4462e41f97aeb754b275a723a1335cdce7ac9224001d567", + "sha256:bad16bcc608ec4d74c680d85aa9bf43cfc776ac12ca3b7e699d7283fd0177bca", + "sha256:ce81d6b2372057e10f9d7cb505942df67a803f270d69959d44d372e8e3792bb9", + "sha256:f19a36cdfdbef75109f505313c16a7b154b9bbf83dd177e9ddd43430dc523bb0" + ], + "version": "==7.3.1" + }, + "tqdm": { + "hashes": [ + "sha256:4733c4a10d0f2a4d098d801464bdaf5240c7dadd2a7fde4ee93b0a0efd9fb25e", + "sha256:acdafb20f51637ca3954150d0405ff1a7edde0ff19e38fb99a80a66210d2a28f" + ], + "version": "==4.46.0" + }, + "uritemplate": { + "hashes": [ + "sha256:07620c3f3f8eed1f12600845892b0e036a2420acf513c53f7de0abd911a5894f", + "sha256:5af8ad10cec94f215e3f48112de2022e1d5a37ed427fbd88652fa908f2ab7cae" + ], + "version": "==3.0.1" + }, + "urllib3": { + "hashes": [ + "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", + "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115" + ], + "version": "==1.25.9" + }, + "wasabi": { + "hashes": [ + "sha256:b8dd3e963cd693fde1eb6bfbecf51790171aa3534fa299faf35cf269f2fd6063", + "sha256:da1f100e0025fe1e50fd67fa5b0b05df902187d5c65c86dc110974ab856d1f05" + ], + "version": "==0.6.0" + }, + "werkzeug": { + "hashes": [ + "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43", + "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c" + ], + "version": "==1.0.1" + }, + "zipp": { + "hashes": [ + "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b", + "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96" + ], + "version": "==3.1.0" + } + }, + "develop": { + "appdirs": { + "hashes": [ + "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", + "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" + ], + "version": "==1.4.4" + }, + "attrs": { + "hashes": [ + "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", + "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" + ], + "version": "==19.3.0" + }, + "black": { + "hashes": [ + "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b", + "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539" + ], + "index": "pypi", + "version": "==19.10b0" + }, + "certifi": { + "hashes": [ + "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304", + "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519" + ], + "version": "==2020.4.5.1" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "click": { + "hashes": [ + "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", + "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" + ], + "version": "==7.1.2" + }, + "coverage": { + "hashes": [ + "sha256:03f630aba2b9b0d69871c2e8d23a69b7fe94a1e2f5f10df5049c0df99db639a0", + "sha256:046a1a742e66d065d16fb564a26c2a15867f17695e7f3d358d7b1ad8a61bca30", + "sha256:0a907199566269e1cfa304325cc3b45c72ae341fbb3253ddde19fa820ded7a8b", + "sha256:165a48268bfb5a77e2d9dbb80de7ea917332a79c7adb747bd005b3a07ff8caf0", + "sha256:1b60a95fc995649464e0cd48cecc8288bac5f4198f21d04b8229dc4097d76823", + "sha256:1f66cf263ec77af5b8fe14ef14c5e46e2eb4a795ac495ad7c03adc72ae43fafe", + "sha256:2e08c32cbede4a29e2a701822291ae2bc9b5220a971bba9d1e7615312efd3037", + "sha256:3844c3dab800ca8536f75ae89f3cf566848a3eb2af4d9f7b1103b4f4f7a5dad6", + "sha256:408ce64078398b2ee2ec08199ea3fcf382828d2f8a19c5a5ba2946fe5ddc6c31", + "sha256:443be7602c790960b9514567917af538cac7807a7c0c0727c4d2bbd4014920fd", + "sha256:4482f69e0701139d0f2c44f3c395d1d1d37abd81bfafbf9b6efbe2542679d892", + "sha256:4a8a259bf990044351baf69d3b23e575699dd60b18460c71e81dc565f5819ac1", + "sha256:513e6526e0082c59a984448f4104c9bf346c2da9961779ede1fc458e8e8a1f78", + "sha256:5f587dfd83cb669933186661a351ad6fc7166273bc3e3a1531ec5c783d997aac", + "sha256:62061e87071497951155cbccee487980524d7abea647a1b2a6eb6b9647df9006", + "sha256:641e329e7f2c01531c45c687efcec8aeca2a78a4ff26d49184dce3d53fc35014", + "sha256:65a7e00c00472cd0f59ae09d2fb8a8aaae7f4a0cf54b2b74f3138d9f9ceb9cb2", + "sha256:6ad6ca45e9e92c05295f638e78cd42bfaaf8ee07878c9ed73e93190b26c125f7", + "sha256:73aa6e86034dad9f00f4bbf5a666a889d17d79db73bc5af04abd6c20a014d9c8", + "sha256:7c9762f80a25d8d0e4ab3cb1af5d9dffbddb3ee5d21c43e3474c84bf5ff941f7", + "sha256:85596aa5d9aac1bf39fe39d9fa1051b0f00823982a1de5766e35d495b4a36ca9", + "sha256:86a0ea78fd851b313b2e712266f663e13b6bc78c2fb260b079e8b67d970474b1", + "sha256:8a620767b8209f3446197c0e29ba895d75a1e272a36af0786ec70fe7834e4307", + "sha256:922fb9ef2c67c3ab20e22948dcfd783397e4c043a5c5fa5ff5e9df5529074b0a", + "sha256:9fad78c13e71546a76c2f8789623eec8e499f8d2d799f4b4547162ce0a4df435", + "sha256:a37c6233b28e5bc340054cf6170e7090a4e85069513320275a4dc929144dccf0", + "sha256:c3fc325ce4cbf902d05a80daa47b645d07e796a80682c1c5800d6ac5045193e5", + "sha256:cda33311cb9fb9323958a69499a667bd728a39a7aa4718d7622597a44c4f1441", + "sha256:db1d4e38c9b15be1521722e946ee24f6db95b189d1447fa9ff18dd16ba89f732", + "sha256:eda55e6e9ea258f5e4add23bcf33dc53b2c319e70806e180aecbff8d90ea24de", + "sha256:f372cdbb240e09ee855735b9d85e7f50730dcfb6296b74b95a3e5dea0615c4c1" + ], + "index": "pypi", + "version": "==5.0.4" + }, + "dataclasses": { + "hashes": [ + "sha256:3459118f7ede7c8bea0fe795bff7c6c2ce287d01dd226202f7c9ebc0610a7836", + "sha256:494a6dcae3b8bcf80848eea2ef64c0cc5cd307ffc263e17cdf42f3e5420808e6" + ], + "markers": "python_version < '3.7'", + "version": "==0.7" + }, + "entrypoints": { + "hashes": [ + "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19", + "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451" + ], + "version": "==0.3" + }, + "flake8": { + "hashes": [ + "sha256:45681a117ecc81e870cbf1262835ae4af5e7a8b08e40b944a8a6e6b895914cfb", + "sha256:49356e766643ad15072a789a20915d3c91dc89fd313ccd71802303fd67e4deca" + ], + "index": "pypi", + "version": "==3.7.9" + }, + "hypothesis": { + "hashes": [ + "sha256:1b358250156fa63a5717f484da4d907343562ae375e454bc89562d8981ea1f77", + "sha256:7e44bff356b32ee5e1ba939f9778d192d094227b5be179cc3efc0d706f211619" + ], + "index": "pypi", + "version": "==5.3.1" + }, + "idna": { + "hashes": [ + "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f", + "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4" + ], + "index": "pypi", + "version": "==2.6" + }, + "importlib-metadata": { + "hashes": [ + "sha256:2a688cbaa90e0cc587f1df48bdc97a6eadccdcd9c35fb3f976a09e3b5016d90f", + "sha256:34513a8a0c4962bc66d35b359558fd8a5e10cd472d37aec5f66858addef32c1e" + ], + "markers": "python_version < '3.8'", + "version": "==1.6.0" + }, + "invoke": { + "hashes": [ + "sha256:87b3ef9d72a1667e104f89b159eaf8a514dbf2f3576885b2bbdefe74c3fb2132", + "sha256:93e12876d88130c8e0d7fd6618dd5387d6b36da55ad541481dfa5e001656f134", + "sha256:de3f23bfe669e3db1085789fd859eb8ca8e0c5d9c20811e2407fa042e8a5e15d" + ], + "index": "pypi", + "version": "==1.4.1" + }, + "libcst": { + "hashes": [ + "sha256:a6dafcf782fa8093c8a89be6698e7e7546d86465a11f273aa0aec7fae0eb1b87", + "sha256:c2a7cd82fd1cb5abb32f8e97fd936977a4fa04864c1d94adf513653a7c6209a4" + ], + "version": "==0.3.5" + }, + "mccabe": { + "hashes": [ + "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", + "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + ], + "version": "==0.6.1" + }, + "more-itertools": { + "hashes": [ + "sha256:558bb897a2232f5e4f8e2399089e35aecb746e1f9191b6584a151647e89267be", + "sha256:7818f596b1e87be009031c7653d01acc46ed422e6656b394b0f765ce66ed4982" + ], + "version": "==8.3.0" + }, + "mypy-extensions": { + "hashes": [ + "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", + "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" + ], + "version": "==0.4.3" + }, + "packaging": { + "hashes": [ + "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", + "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" + ], + "version": "==20.4" + }, + "pathspec": { + "hashes": [ + "sha256:7d91249d21749788d07a2d0f94147accd8f845507400749ea19c1ec9054a12b0", + "sha256:da45173eb3a6f2a5a487efba21f050af2b41948be6ab52b6a1e3ff22bb8b7061" + ], + "version": "==0.8.0" + }, + "pluggy": { + "hashes": [ + "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", + "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" + ], + "version": "==0.13.1" + }, + "psutil": { + "hashes": [ + "sha256:1413f4158eb50e110777c4f15d7c759521703bd6beb58926f1d562da40180058", + "sha256:298af2f14b635c3c7118fd9183843f4e73e681bb6f01e12284d4d70d48a60953", + "sha256:60b86f327c198561f101a92be1995f9ae0399736b6eced8f24af41ec64fb88d4", + "sha256:685ec16ca14d079455892f25bd124df26ff9137664af445563c1bd36629b5e0e", + "sha256:73f35ab66c6c7a9ce82ba44b1e9b1050be2a80cd4dcc3352cc108656b115c74f", + "sha256:75e22717d4dbc7ca529ec5063000b2b294fc9a367f9c9ede1f65846c7955fd38", + "sha256:a02f4ac50d4a23253b68233b07e7cdb567bd025b982d5cf0ee78296990c22d9e", + "sha256:d008ddc00c6906ec80040d26dc2d3e3962109e40ad07fd8a12d0284ce5e0e4f8", + "sha256:d84029b190c8a66a946e28b4d3934d2ca1528ec94764b180f7d6ea57b0e75e26", + "sha256:e2d0c5b07c6fe5a87fa27b7855017edb0d52ee73b71e6ee368fae268605cc3f5", + "sha256:f344ca230dd8e8d5eee16827596f1c22ec0876127c28e800d7ae20ed44c4b310" + ], + "version": "==5.7.0" + }, + "py": { + "hashes": [ + "sha256:5e27081401262157467ad6e7f851b7aa402c5852dbcb3dae06768434de5752aa", + "sha256:c20fdd83a5dbc0af9efd622bee9a5564e278f6380fffcacc43ba6f43db2813b0" + ], + "version": "==1.8.1" + }, + "pycodestyle": { + "hashes": [ + "sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56", + "sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c" + ], + "version": "==2.5.0" + }, + "pyflakes": { + "hashes": [ + "sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0", + "sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2" + ], + "version": "==2.1.1" + }, + "pyparsing": { + "hashes": [ + "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", + "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" + ], + "version": "==2.4.7" + }, + "pyre-check": { + "hashes": [ + "sha256:215502a3e4f66ccf73766815d67249643ae3bee1782c054568134ff35183de74", + "sha256:57d7772142746c3396667c15c2b201a37b255ee889fc1e6e902528c96f92b172", + "sha256:f1fc1850a21703b8c2eadcc3e9529708bb3dca21e7c4642ab97ec2df4ac7a055" + ], + "index": "pypi", + "version": "==0.0.41" + }, + "pyre-extensions": { + "hashes": [ + "sha256:60e0411e91ecbeaf1fd5d8851ffce42baf6a74ec7ccd01db545c7a97f15aac30", + "sha256:e6ad1facef54c982d9c0d9780b1aee9faf5cf53b3c7860a087062a206d1598c8" + ], + "version": "==0.0.18" + }, + "pytest": { + "hashes": [ + "sha256:1d122e8be54d1a709e56f82e2d85dcba3018313d64647f38a91aec88c239b600", + "sha256:c13d1943c63e599b98cf118fcb9703e4d7bde7caa9a432567bcdcae4bf512d20" + ], + "index": "pypi", + "version": "==5.3.4" + }, + "pywatchman": { + "hashes": [ + "sha256:d0047eb275deafb0011eda0a1a815fbd9742478c3d2b5ad6956d300e447dc2f9" + ], + "version": "==1.4.1" + }, + "pyyaml": { + "hashes": [ + "sha256:059b2ee3194d718896c0ad077dd8c043e5e909d9180f387ce42012662a4946d6", + "sha256:1cf708e2ac57f3aabc87405f04b86354f66799c8e62c28c5fc5f88b5521b2dbf", + "sha256:24521fa2890642614558b492b473bee0ac1f8057a7263156b02e8b14c88ce6f5", + "sha256:4fee71aa5bc6ed9d5f116327c04273e25ae31a3020386916905767ec4fc5317e", + "sha256:70024e02197337533eef7b85b068212420f950319cc8c580261963aefc75f811", + "sha256:74782fbd4d4f87ff04159e986886931456a1894c61229be9eaf4de6f6e44b99e", + "sha256:940532b111b1952befd7db542c370887a8611660d2b9becff75d39355303d82d", + "sha256:cb1f2f5e426dc9f07a7681419fe39cee823bb74f723f36f70399123f439e9b20", + "sha256:dbbb2379c19ed6042e8f11f2a2c66d39cceb8aeace421bfc29d085d93eda3689", + "sha256:e3a057b7a64f1222b56e47bcff5e4b94c4f61faac04c7c4ecb1985e18caa3994", + "sha256:e9f45bd5b92c7974e59bcd2dcc8631a6b6cc380a904725fce7bc08872e691615" + ], + "index": "pypi", + "version": "==5.3" + }, + "regex": { + "hashes": [ + "sha256:1386e75c9d1574f6aa2e4eb5355374c8e55f9aac97e224a8a5a6abded0f9c927", + "sha256:27ff7325b297fb6e5ebb70d10437592433601c423f5acf86e5bc1ee2919b9561", + "sha256:329ba35d711e3428db6b45a53b1b13a0a8ba07cbbcf10bbed291a7da45f106c3", + "sha256:3a9394197664e35566242686d84dfd264c07b20f93514e2e09d3c2b3ffdf78fe", + "sha256:51f17abbe973c7673a61863516bdc9c0ef467407a940f39501e786a07406699c", + "sha256:579ea215c81d18da550b62ff97ee187b99f1b135fd894a13451e00986a080cad", + "sha256:70c14743320a68c5dac7fc5a0f685be63bc2024b062fe2aaccc4acc3d01b14a1", + "sha256:7e61be8a2900897803c293247ef87366d5df86bf701083b6c43119c7c6c99108", + "sha256:8044d1c085d49673aadb3d7dc20ef5cb5b030c7a4fa253a593dda2eab3059929", + "sha256:89d76ce33d3266173f5be80bd4efcbd5196cafc34100fdab814f9b228dee0fa4", + "sha256:99568f00f7bf820c620f01721485cad230f3fb28f57d8fbf4a7967ec2e446994", + "sha256:a7c37f048ec3920783abab99f8f4036561a174f1314302ccfa4e9ad31cb00eb4", + "sha256:c2062c7d470751b648f1cacc3f54460aebfc261285f14bc6da49c6943bd48bdd", + "sha256:c9bce6e006fbe771a02bda468ec40ffccbf954803b470a0345ad39c603402577", + "sha256:ce367d21f33e23a84fb83a641b3834dd7dd8e9318ad8ff677fbfae5915a239f7", + "sha256:ce450ffbfec93821ab1fea94779a8440e10cf63819be6e176eb1973a6017aff5", + "sha256:ce5cc53aa9fbbf6712e92c7cf268274eaff30f6bd12a0754e8133d85a8fb0f5f", + "sha256:d466967ac8e45244b9dfe302bbe5e3337f8dc4dec8d7d10f5e950d83b140d33a", + "sha256:d881c2e657c51d89f02ae4c21d9adbef76b8325fe4d5cf0e9ad62f850f3a98fd", + "sha256:e565569fc28e3ba3e475ec344d87ed3cd8ba2d575335359749298a0899fe122e", + "sha256:ea55b80eb0d1c3f1d8d784264a6764f931e172480a2f1868f2536444c5f01e01" + ], + "version": "==2020.5.14" + }, + "requests": { + "hashes": [ + "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", + "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" + ], + "version": "==2.23.0" + }, + "six": { + "hashes": [ + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" + ], + "index": "pypi", + "version": "==1.11.0" + }, + "sortedcontainers": { + "hashes": [ + "sha256:974e9a32f56b17c1bac2aebd9dcf197f3eb9cd30553c5852a3187ad162e1a03a", + "sha256:d9e96492dd51fae31e60837736b38fe42a187b5404c16606ff7ee7cd582d4c60" + ], + "version": "==2.1.0" + }, + "toml": { + "hashes": [ + "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", + "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" + ], + "version": "==0.10.1" + }, + "typed-ast": { + "hashes": [ + "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355", + "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919", + "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa", + "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652", + "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75", + "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01", + "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d", + "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1", + "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907", + "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c", + "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3", + "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b", + "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614", + "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb", + "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b", + "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41", + "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6", + "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34", + "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe", + "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4", + "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7" + ], + "version": "==1.4.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:6e95524d8a547a91e08f404ae485bbb71962de46967e1b71a0cb89af24e761c5", + "sha256:79ee589a3caca649a9bfd2a8de4709837400dfa00b6cc81962a1e6a1815969ae", + "sha256:f8d2bd89d25bc39dabe7d23df520442fa1d8969b82544370e03d88b5a591c392" + ], + "version": "==3.7.4.2" + }, + "typing-inspect": { + "hashes": [ + "sha256:3b98390df4d999a28cf5b35d8b333425af5da2ece8a4ea9e98f71e7591347b4f", + "sha256:8f1b1dd25908dbfd81d3bebc218011531e7ab614ba6e5bf7826d887c834afab7", + "sha256:de08f50a22955ddec353876df7b2545994d6df08a2f45d54ac8c05e530372ca0" + ], + "version": "==0.6.0" + }, + "urllib3": { + "hashes": [ + "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", + "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115" + ], + "version": "==1.25.9" + }, + "wcwidth": { + "hashes": [ + "sha256:cafe2186b3c009a04067022ce1dcd79cb38d8d65ee4f4791b8888d6599d1bbe1", + "sha256:ee73862862a156bf77ff92b09034fc4825dd3af9cf81bc5b360668d425f3c5f1" + ], + "version": "==0.1.9" + }, + "zipp": { + "hashes": [ + "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b", + "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96" + ], + "version": "==3.1.0" + } + } +} diff --git a/QA.py b/QA.py new file mode 100644 index 0000000..063354a --- /dev/null +++ b/QA.py @@ -0,0 +1,299 @@ +from typing import Callable, Dict, Any, List, Tuple +import functools +import re +from Entity.Courses import Courses +from Entity.Locations import Locations +from Entity.Profs import Profs +from Entity.Clubs import Clubs +from Entity.Sections import Sections +from Entity.ProfessorSectionView import ProfessorSectionView +from database_wrapper import NimbusMySQLAlchemy +import itertools + +Extracted_Vars = Dict[str, Any] +DB_Data = Dict[str, Any] +DB_Query = Callable[[Extracted_Vars], DB_Data] +Answer_Formatter = Callable[[Extracted_Vars, DB_Data], str] + + +tag_lookup = { + "PROF": Profs, + "CLUB": Clubs, + "COURSE": Courses, + "SECRET_HIDEOUT": Locations, + "SECTION": Sections, + "PROF_SECTION": ProfessorSectionView, +} + + +class QA: + """ + A class for wrapping functions used to answer a question. + """ + + def __init__(self, q_format, db_query, format_answer, db): + """ + Args: + q_format (str): Question format string + db (NimbusDatabase): Object used to access remote database + db_query (DB_Query): Function used to get data from database. Takes + a dict of extracted variables and returns a dict of variables + from the database. + format_answer (Answer_Formatter): Function used to format answer + string. Takes two dicts--one of extracted variables and one of + data retrieved from the database--and returns a str. + """ + self.db = db + self.q_format = q_format + self.db_query = db_query + self.format_answer = format_answer + + def answer(self, extracted_vars): + db_data = self.db_query(extracted_vars, self.db) + answer = self.format_answer(extracted_vars, db_data) + return None if answer == '' else answer + + def __repr__(self): + return self.q_format + + def __hash__(self): + return hash(self.q_format) + + +def create_qa_mapping(qa_list): + """ + Creates a dictionary whose values are QA objects and keys are the question + formats of those QA objects. + + Args: + qa_list (list(QA)) + """ + return {qa.q_format: qa for qa in qa_list} + + +def _string_sub(a_format, extracted_info, db_data): + if None in db_data.values(): + return None + else: + return a_format.format(ex=extracted_info["normalized entity"], **db_data) + + +def string_sub(a_format): + return functools.partial(_string_sub, a_format) + + +def _get_property( + prop: str, table: str, extracted_info: Extracted_Vars, db: NimbusMySQLAlchemy +): + ent_string = extracted_info["normalized entity"] + if table is None: + ent = tag_lookup[extracted_info["tag"]] + else: + ent = tag_lookup[table] + try: + value = db.get_property_from_entity( + prop=prop, entity=ent, identifier=ent_string + ) + except IndexError: + return {f"db_{prop}": None} + else: + return {f"db_{prop}": value} + + +def get_property(prop: str, table: str = None): + return functools.partial(_get_property, prop, table) + + +def _get_property_list( + prop: str, + joiner: str, + table: str, + extracted_info: Extracted_Vars, + db: NimbusMySQLAlchemy, +): + ent_string = extracted_info["normalized entity"] + if table is None: + ent = tag_lookup[extracted_info["tag"]] + else: + ent = tag_lookup[table] + + try: + values = db._get_property_from_entity( + prop=prop, entity=ent, identifier=ent_string + ) + except IndexError: + return {f"db_{prop}": None} + else: + exact_matches = get_all_exact_matches(values) + return {f"db_{prop}": _grammatical_join(exact_matches, joiner)} + + +def get_property_list(prop: str, joiner: str, table: str = None): + return functools.partial(_get_property_list, prop, joiner, table) + + +def _generic_answer_formatter( + a_format: str, pred: Any, extracted_info: Extracted_Vars, db_data: DB_Data +): + + if type(pred) == str: + t_f = re.search(pred, db_data) + elif callable(pred): + t_f = pred(db_data) + else: + t_f = bool(db_data) + + y_n = "Yes" if t_f else "No" + _not = "" if t_f else "not" + not_not = "not" if t_f else "" + + return a_format.format( + y_n=y_n, + yes_no=y_n, + _not=_not, + not_not=not_not, + t_f=t_f, + db=db_data, + ex=extracted_info, + ) + + +def generic_answer_formatter(a_format: str, pred: Any = None): + return functools.partial(_generic_answer_formatter, a_format, pred) + + +def _grammatical_join(substrings: list, last_two_join: str = "and"): + if len(substrings) == 0: + return "" + elif len(substrings) == 1: + return substrings[0] + elif len(substrings) == 2: + return f"{substrings[0]} {last_two_join} {substrings[1]}" + else: + substrings.append(f"{last_two_join} {substrings.pop()}") + return ", ".join(substrings) + + +def format_prof_office_hours(extracted_vars: Extracted_Vars, db_data: DB_Data): + prof = extracted_vars["PROF"]["normalized entity"] + days = db_data["PROF"]["OfficeHours"] + return _format_prof_office_hours(prof, days) + + +def _format_prof_office_hours(prof: str, days: str): + def hours(x): return x[1] + + week = [] + for token in days.split(", "): + try: + d, h = token.split(" ", 1) + except ValueError: + continue + week.append((d, h)) + + if not week: + return f"{prof} currently has no office hours" + + week.sort(key=hours) + groups = [] + keys = [] + for key, group in itertools.groupby(week, hours): + groups.append(list(group)) + keys.append(key) + + if keys[0] == "on leave": + return f"{prof} is currently on leave" + + substrings = [] + for g in groups: + ds = [d for d, _ in g] + k = hours(g[0]).replace("-", "to") + substrings.append(f"{_grammatical_join(ds)} {k}") + + return f"{prof} has office hours {_grammatical_join(substrings)}" + + +def _chain_db_access( + fns: List[DB_Query], extracted_vars: Extracted_Vars, db: NimbusMySQLAlchemy +) -> DB_Data: + """ + Combines behavior of a list of database access functions + + Args: + fns: List of database access functions to run + extracted_vars: Dictionary of extracted information to run fns against + + Returns: + A dictionary of database data + """ + db_data = dict() + for fn in fns: + for key, val in fn(extracted_vars, db).items(): + index = 1 + while key in db_data: + key = f"{key}{index}" + index += 1 + db_data[key] = val + return db_data + + +# Actually returns partial[Dict[str, Any]] +def chain_db_access(fns: List[DB_Query]) -> DB_Query: + return functools.partial(_chain_db_access, fns) + + +def get_all_exact_matches(matches): + exact = matches[-1][1] + exact_matches = [] + for match in reversed(matches): + if match[1] == exact: + exact_matches.append(match[2]) + return exact_matches + + +def generate_qa_pairs(qa_pairs: Tuple[str, str], db: NimbusMySQLAlchemy): + qa_objs = [] + for pair in qa_pairs: + q, a = pair + db_access_fns = [] + tokens = a.split() + for i, token in enumerate(tokens): + # I get errors if I don't cast token to a string here, even though str.split() should + # return a list of strings + match = re.match(r"\[(.*?)\]", str(token)) + if not match: + continue + else: + subtokens = match.group(1).split("..") + # Match is an entity + if len(subtokens) == 1: + tokens[i] = "{ex}" + # Match is a single-item property + elif len(subtokens) == 2: + ent, prop = subtokens + db_access_fns.append(get_property(prop)) + tokens[i] = "{db_" + prop + "}" + elif len(subtokens) == 3: + ent, prop, third = subtokens + if third in tag_lookup: + # third is a table name + db_access_fns.append(get_property(prop, third)) + else: + # third is the string used to join the last two of a list of items + db_access_fns.append(get_property_list(prop, third)) + tokens[i] = "{db_" + prop + "}" + elif len(subtokens) == 4: + ent, prop, table, joiner = subtokens + db_access_fns.append( + get_property_list(prop, joiner, table)) + tokens[i] = "{db_" + prop + "}" + + o = QA( + q_format=q, + db_query=chain_db_access(db_access_fns), + format_answer=string_sub(" ".join(tokens)), + db=db, + ) + qa_objs.append(o) + + return qa_objs diff --git a/README.md b/README.md index 9360176..6288667 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,208 @@ # api -Python wrapper for MySQL database. -Also a REST API soon. +![GitHub repo size](https://img.shields.io/github/repo-size/calpoly-csai/api) +![GitHub code size in bytes](https://img.shields.io/github/languages/code-size/calpoly-csai/api) +![GitHub closed issues](https://img.shields.io/github/issues-closed/calpoly-csai/api) +![GitHub closed pull requests](https://img.shields.io/github/issues-pr-closed/calpoly-csai/api) -[Michael Fekadu](github.com/mfekadu) +[![Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=calpoly-csai_api&metric=vulnerabilities)](https://sonarcloud.io/dashboard?id=calpoly-csai_api) +[![Bugs](https://sonarcloud.io/api/project_badges/measure?project=calpoly-csai_api&metric=bugs)](https://sonarcloud.io/dashboard?id=calpoly-csai_api) +[![Code Smells](https://sonarcloud.io/api/project_badges/measure?project=calpoly-csai_api&metric=code_smells)](https://sonarcloud.io/dashboard?id=calpoly-csai_api) +[![Technical Debt](https://sonarcloud.io/api/project_badges/measure?project=calpoly-csai_api&metric=sqale_index)](https://sonarcloud.io/dashboard?id=calpoly-csai_api) -[Tyler Campanile](github.com/tecampani) +Official API for the [NIMBUS Voice Assistant](https://github.com/calpoly-csai/CSAI_Voice_Assistant) accessible via HTTP REST protocol. -[Henry Yin](github.com/hyinnn) +## 🚧 This API is still in-development, so expect the endpoints to be constantly changing until finalized. -[Ben Dahlgren](github.com/Dahlgreb) +## GitHub Actions Status +![Deploy To Staging Server](https://github.com/calpoly-csai/api/workflows/Deploy%20Dev%20To%20Staging%20Server/badge.svg) -[Kush Upadhyay](github.com/kpu-21) +![Build and Deploy to Google Compute Engine](https://github.com/calpoly-csai/api/workflows/Build%20and%20Deploy%20to%20Google%20Compute%20Engine/badge.svg) -[Steven Bradley](github.com/stbradle) +![Run Tests](https://github.com/calpoly-csai/api/workflows/Run%20Tests/badge.svg) -[Daniel DeFoe](github.com/danield2255) +![Python PEP8 Style Check](https://github.com/calpoly-csai/api/workflows/Python%20PEP8%20Style%20Check/badge.svg) + +![Python Pyre Type Annotation Check](https://github.com/calpoly-csai/api/workflows/Python%20Pyre%20Type%20Annotation%20Check/badge.svg) + +## Documentation + +- [ ] **TODO: INSERT LINK TO API DOCUMENTATION** + +- [ ] **TODO: USE http://readthedocs.org to host the docs live on the internet** + +- [ ] **TODO: [CREATE API DOCS 😅](https://github.com/calpoly-csai/api/milestone/2)** + + +## Dev Environment Setup +### Prerequisites +1. Python 3.6.9+ (we're running 3.8 on gce) +2. pip 9.0.1+ (pip 20.1.1 is whats on gce as of this writing, but whatever version you have with your python install is probably fine) +3. git (can you run `git --version`?) (we're using 2.25.1 on gce, but any version will *almost certainly* work) +4. Access to database credentials/api keys (contact us) + +If you're running Windows, we recommend (it is not required) setting up the [Windows subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/install-win10). + +### Database Configuration + +**There are THREE ways to get your database config setup. If you're a member of CSAI, there's a script to generate a config file - contact a club member to get access to it. ** + +**Create a file** called `config.json` that should include at least the following details of a MySQL database: +```json +{ + ... + "mysql": { + "host": "HOSTNAME", + "port": "PORT e.g. 3306", + "user": "USERNAME", + "password": "PASSWORD", + "database": "DATABASE", + ... + } + ... +} +``` + +**You can also use [`config_SAMPLE.json`](https://github.com/calpoly-csai/api/blob/dev/config_SAMPLE.json) as a reference (easier!)** + +_Contact anyone on the Data Team to get connection details for the Nimbus database_ + + +### Install pip packages +> Note: If you don't have pip installed for python2, you may have to call `pip` instead of `pip3`) + +```bash +pip3 install -r requirements.txt +``` + +### Download spaCy model +There are two spaCy models usable for nimbus predictions. These are (from [spaCy's docs](https://spacy.io/models)) General-purpose pretrained models to predict named entities, part-of-speech tags and syntactic dependencies. Can be used out-of-the-box and fine-tuned on more specific data. We use these to classify a question someone asks Nimbus, so we can retrieve an answer from our database.See [here](https://stackoverflow.com/a/57337084/13291759) for an idea of the difference between lg and sm. + +```bash +python3 -m spacy download en_core_web_sm +``` +or +```bash +python3 -m spacy download en_core_web_lg +``` + +### Get API keys for terminal instance + +```bash +git clone https://github.com/calpoly-csai/nimbus-config-secrets.git + +cd nimbus-config-secrets + +source .export_env_vars +``` + + +You should now have everything you need to run the API server. + + +### Run the API server + +```bash +python3 flask_api.py +``` +- Opens localhost:PORT +- PORT is changed in gunicorn_config.py + +**_Run in the background_** +```bash +python3 flask_api.py& +``` + +### Run your first API request +```bash +curl --location --request POST 'localhost:8080/ask' --header 'Content-Type: application/json' --data-raw '{ "question": "What are the prereqs for CSC 480?" }' +``` +- Sends Query to Database through localhost:8080/ask + + +### Run the tests +```bash +pytest +``` +(you may have to run the following if you have python 2.7 installed as well): +```bash +python3 -m pytest +``` + +### Python PEP8 Style Standards +**_Run the `format` script to automatically make our code look nice_** +```bash +./format.sh +``` + +_Sometimes the format script is not enough, so run `lint` to manually style our code_ +```bash +./lint.sh +``` + +### Some common issues and solutions +##### ```/bin/bash^M: bad interpreter: No such file or directory```: +>Note: This issue can occur (for me, at least) in files other than just format.sh. These instructions should fix this problem in any file. +``` +snekiam@P1:api$ ./format.sh +-bash: ./format.sh: /bin/bash^M: bad interpreter: No such file or directory +``` +This can happen when you're running bash on windows sometimes. Linux doesn't recognize carriage returns (^M) the same way windows does. You may have this error in more than one file, but it can be fixed by running `dos2unix` like this: +``` +snekiam@P1:api$ dos2unix format.sh +dos2unix: converting file format.sh to Unix format... +snekiam@P1:api$ ./format.sh +(output of format.sh, which can be long so I haven't included it here) +``` + +## Deployment +### What we use +A Linux server (e.g. Ubuntu 20.04 LTS) with open firewall at `tcp:5000` for _Flask_, `tcp:80` for _http_ and `tcp:443` for _https_ and `tcp:22` for _ssh_ and `tcp:3306` for _mysql_ + +[See this documentation of the database deployment process](https://github.com/calpoly-csai/wiki/wiki/How-To-Install-and-Set-Up-a-Remote-MySQL-5.7-Database-and-Python-3.6-on-Ubuntu-18.04-with-Google-Cloud-Platform) + +We're using docker to deploy - at some point, it'll be easy for you to run the Nimbus API in a local docker container, but at the moment it requires some things (like SSH keys and SSL private keys) which shouldn't be required for a development environment. + + +## Contributing +![GitHub issues](https://img.shields.io/github/issues/calpoly-csai/api) + +Have a [**look at the v1.0 project board for TODOs**](https://github.com/calpoly-csai/api/projects/1)! + +If you are new to programming, then filter for the [**![good first issue](https://img.shields.io/github/labels/calpoly-csai/api/good%20first%20issue)** label](https://github.com/calpoly-csai/api/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) + +Pull requests are welcome. + +For major changes, please [**open an issue**](https://github.com/calpoly-csai/api/issues/new) first to discuss what you would like to change. + +Please make sure to update tests as appropriate. + +## License +[GNU GPLv3](https://choosealicense.com/licenses/gpl-3.0/) + + +## Authors and acknowledgment + +[Michael Fekadu](https://www.github.com/mfekadu) + +[John Waidhofer](https://www.github.com/Waidhoferj) + +[Miles Aikens](https://www.github.com/snekiam) + +[Daniel DeFoe](https://www.github.com/danield2255) + +[Taylor Nguyen](https://www.github.com/taylor-nguyen-987) + +[Adam Perlin](https://www.github.com/adamperlin) + +[Simon Ibssa](https://www.github.com/ibssasimon) + +[Kush Upadhyay](https://www.github.com/kpu-21) + +[Ben Dahlgren](https://www.github.com/Dahlgreb) + +[Tyler Campanile](https://www.github.com/tecampani) + +[Steven Bradley](https://www.github.com/stbradle) + +And many more... diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/build.py b/build.py new file mode 100755 index 0000000..7de58c1 --- /dev/null +++ b/build.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +""" +This script will install/download/compile everything you need to work on Nimbus. +""" + +import os +import subprocess +import sys +import argparse +import warnings + + +# lazily mock the colorful. functions until import +def bold_green(s, *args, **kwargs): + return str(s) + + +def bold_yellow(s, *args, **kwargs): + return str(s) + + +def bold_orange(s, *args, **kwargs): + return str(s) + + +def bold_white(s, *args, **kwargs): + return str(s) + + +def bold_blue(s, *args, **kwargs): + return str(s) + + +def bold_red(s, *args, **kwargs): + return str(s) + + +def gray_on_gray(s, *args, **kwargs): + return str(s) + + +def reset_color(s, *args, **kwargs): + return str(s) + + +def run( + cmd_tokens, + fail_msg, + run_msg=None, + q=False, + skip_assert=False, + warn=False, + capture_output=False, + qq=False, +): + # print(os.path.split(cmd_tokens[0:1])[1] + cmd_tokens[1:]) + fname = os.path.split(cmd_tokens[0])[1] + args = cmd_tokens[1:] + run_msg = run_msg or bold_blue( + "\nrunning... " + " ".join([fname] + args) + "\n", nested=True + ) + print(run_msg) if not qq else None + stdout = subprocess.DEVNULL if q else None # default STDOUT + stderr = subprocess.DEVNULL if q else None # default STDERR + stdout = subprocess.PIPE if capture_output else stdout + res = subprocess.run(cmd_tokens, stdout=stdout, stderr=stdout) + if skip_assert: + return res.stdout.decode("utf-8").strip() if capture_output else res + else: + its_good = res.returncode == 0 + if its_good: + print(bold_blue("✅ it is good.", nested=True)) if not qq else None + return res.stdout.decode("utf-8").strip() if capture_output else res + elif warn: + warnings.warn( + bold_yellow(reset_color("WARNING: " + fail_msg)) + ) if not q else None + return res.stdout.decode("utf-8").strip() if capture_output else res + else: + assert its_good, bold_red(fail_msg) + + +if __name__ == "__main__": + # ================================================================== + # GET THE COMMAND LINE ARGUMENTS + # ================================================================== + parser = argparse.ArgumentParser(description="Build Nimbus.") + parser.add_argument( + "-v", + "--verbose", + action="store_false", # default is True so that all the funcs with `q=Q` (True) will be quiet by default # noqa + help="optionally be VERBOSE with printouts (default: not verbose).", + ) + parser.add_argument( + "--spacy-large", + action="store_true", + help="optionally download spacy's `en_core_web_lg` model (default: False).", + ) + parser.add_argument( + "--no-overwrite-secrets", + action="store_true", + help="optionally avoid passing in --overwrite-all into setup_special_files_from_env (default: False).", + ) + args = parser.parse_args() + Q = args.verbose + SPACY_LARGE = args.spacy_large + NO_OVERWRITE_SECRETS = args.no_overwrite_secrets + + # ========================================================================= + # GET THE CURRENTLY RUNNING PYTHON EXECUTABLE PATH + # ========================================================================= + python = sys.executable + + # ========================================================================= + # SOME SCRIPT SETUP + # ========================================================================= + try: + # --------------------------------------------------------------------- + # install some nice-to-have terminal color packages + # --------------------------------------------------------------------- + cmd = [ + python, + "-m", + "pip", + "install", + "colorful", + "termcolor", + "ansicolors", + "colorama", + ] + run(cmd, fail_msg="oops color packages failed to install. that's ok.", q=Q) + from colorama import init + import colorful as cf + from colors import strip_color + from contextlib import contextmanager + import copy + + # https://github.com/timofurrer/colorful#styles + cf.use_style("monokai") + bold_green = cf.bold_green + bold_orange = cf.bold_orange + bold_yellow = cf.bold_yellow + bold_white = cf.bold + bold_red = cf.bold_magenta + gray_on_gray = cf.gray_on_gray + bold_blue = cf.bold_blue + + def reset_color(s, *args, **kwargs): + if isinstance(s, cf.core.ColorfulString): + return strip_color(s.orig_string) + elif isinstance(s, str): + return strip_color(s) + else: + raise NotImplementedError("expected either ColorfulString or str.") + + # shadows mock functions + init() + print(bold_green("color!") + bold_orange(" wow!")) + except Exception as e: + # --------------------------------------------------------------------- + # make sure you are not on old python + # --------------------------------------------------------------------- + if sys.version_info.major < 3: + # https://docs.python.org/3/library/exceptions.html#SystemError + msg = ( + "\n\n" + + "you are running a python with `sys.version_info.major < 3`" + + "\n" + + "\n" + + str(sys.version) + + "\n" + + "\n" + + "try again with Python >= 3.6.8" + + "\n" + ) + raise SystemError(msg) + exit(1) + else: + # otherwise the color packages failed for other reasons. that's ok. + print(e) + print("\n\nhmm.. no colors, that's ok.\n\n") + + # ========================================================================= + # INSTALL THE REQUIREMENTS + # ========================================================================= + cmd = [python, "-m", "pip", "install", "-r", "requirements.txt"] + run(cmd_tokens=cmd, fail_msg="failed to install requirements", q=Q) + + # ========================================================================= + # SETUP THE SPECIAL/SECRET FILES + # ========================================================================= + if NO_OVERWRITE_SECRETS: + cmd = [python, "setup_special_files_from_env.py"] + else: + cmd = [python, "setup_special_files_from_env.py", "--overwrite-all"] + missing_secrets_msg = ( + bold_red( + "failed to setup special files" + "\n\n" + "you need the ", nested=True + ) + + bold_orange("nimbus-config-secrets", nested=True) + + "\n" + + bold_red( + "\n" + + "ask an Nimbus maintainer/admin for help" + + "\n" + + "\n" + + "OR read the " + + bold_yellow(" setup_special_files_from_env.py ", nested=True) + + bold_red( + " file " + + "and setup the appropriate environment files yourself." + + "\n\nalso see the " + + bold_yellow("config_SAMPLE.json", nested=True,), + nested=True, + ), + nested=True, + ) + ) + res = run(cmd_tokens=cmd, fail_msg=missing_secrets_msg, q=Q, warn=True) + if res.returncode > 0: + if "config.json" in os.listdir(os.getcwd()): + # ----------------------------------------------------------------- + # setup failed, but we can check if `config.json` is already + # ----------------------------------------------------------------- + print( + bold_yellow( + ( + "\n\nfound config.json... continuing build...\n\n" + "if you do not want your current config.json\n" + "please follow the steps in the nimbus-config-secrets reposotiry\n" + "ask a Maintainer/Admin for help\n" + ) + ) + ) + else: + # ----------------------------------------------------------------- + # okay, fine. last effort to find `config.json` at root of git dir + # ----------------------------------------------------------------- + cmd = "git rev-parse --show-toplevel".split(" ") + top_level_path = run( + cmd, + fail_msg="this is not a git directory", + q=Q, + warn=True, + capture_output=True, + qq=True, + ) + assert "config.json" in os.listdir(os.getcwd()), missing_secrets_msg + + # ========================================================================= + # INSTALL THE LATEST `urllib3` for reasons + # ========================================================================= + cmd = [python, "-m", "pip", "install", "--upgrade", "urllib3"] + res = run(cmd_tokens=cmd, fail_msg="failed to get `urllib3`", q=Q) + + # ========================================================================= + # GET THE `en_core_web_sm` SPACY MODEL + # ========================================================================= + cmd = [python, "-m", "spacy", "download", "en_core_web_sm"] + res = run(cmd_tokens=cmd, fail_msg="failed to get `en_core_web_sm`", q=Q) + + # ========================================================================= + # GET THE `en_core_web_lg` SPACY MODEL + # TODO: consider letting people download the small/medium one... + # ========================================================================= + if SPACY_LARGE: + cmd = [python, "-m", "spacy", "download", "en_core_web_lg"] + res = run(cmd_tokens=cmd, fail_msg="failed to get `en_core_web_lg`", q=Q) + else: + print( + bold_blue( + "\n\nskipping spacy's `en_core_web_lg` download..." + "\nnimbus will work with `en_core_web_sm` at least\n" + ) + ) + + # ========================================================================= + # DONE + # ========================================================================= + print(bold_green("done")) diff --git a/config_SAMPLE.json b/config_SAMPLE.json new file mode 100644 index 0000000..ce30da5 --- /dev/null +++ b/config_SAMPLE.json @@ -0,0 +1,17 @@ +{ + "PYDRIVE_CLIENT_ID": "SECRET", + "PYDRIVE_CLIENT_SECRET": "SECRET", + "GOOGLE_DRIVE_CREDENTIALS": "SECRET", + "GOOGLE_DRIVE_FOLDER_ID": "SECRET", + "GOOGLE_CLOUD_NLP_CREDENTIALS": "SECRET", + "GOOGLE_CLOUD_NLP_MODEL_NAME": "SECRET", + "mysql": { + "host": "HOSTNAME", + "port": "PORT e.g. 3306", + "user": "USERNAME", + "password": "PASSWORD", + "database": "DATABASE", + "sql_dir": "sql", + "create_file": "csai_nov_8_2019_create_script.min.sql" + } +} \ No newline at end of file diff --git a/coverage.sh b/coverage.sh new file mode 100755 index 0000000..89a7484 --- /dev/null +++ b/coverage.sh @@ -0,0 +1,7 @@ +python -m coverage run -m pytest + +# omit because pipenv creates a "~/.local" directory +coverage html --omit="*.local*" + +echo "\nopen htmlcov/index.html\n" + diff --git a/create_all_tables.py b/create_all_tables.py new file mode 100755 index 0000000..aceaba9 --- /dev/null +++ b/create_all_tables.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +from database_wrapper import NimbusMySQLAlchemy + +db = NimbusMySQLAlchemy(config_file="config.json") +db._create_all_tables() diff --git a/database_wrapper.py b/database_wrapper.py new file mode 100755 index 0000000..e919553 --- /dev/null +++ b/database_wrapper.py @@ -0,0 +1,944 @@ +#!/usr/bin/env python3 +"""A wrapper module for the Nimbus data storage systems. + +This module includes various adapters for interfacing with +different databases and storage locations. + + Typical usage example: + + db = NimbusMySQL(config_file="config.json") + ents = db.get_entities() +""" +import json +import csv +from abc import ABC, abstractmethod +from typing import List, Optional, Union +import datetime + + +import sqlalchemy +from sqlalchemy import create_engine, inspect +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from Entity.Entity import Entity +from Entity.AudioSampleMetaData import AudioSampleMetaData, NoiseLevel +from Entity.Calendars import Calendars +from Entity.Courses import Courses +from Entity.ErrorLog import ErrorLog +from Entity.Locations import Locations +from Entity.QuestionAnswerPair import QuestionAnswerPair, AnswerType +from Entity.QueryFeedback import QueryFeedback +from Entity.Professors import ProfessorsProperties +from Entity.Clubs import Clubs +from Entity.Sections import Sections, SectionType +from Entity.Profs import Profs +from Entity.Professors import Professors +from Entity.ProfessorSectionView import ProfessorSectionView +from Entity.OfficeHours import OfficeHours +from Entity.QuestionLog import QuestionLog +from Entity.ExpectedKeys import EXPECTED_KEYS_BY_ENTITY + +from fuzzywuzzy import fuzz + + +GREEN_COLOR_CODE = "\033[92m" +YELLOW_COLOR_CODE = "\033[93m" +PURPLE_COLOR_CODE = "\033[95m" +CYAN_COLOR_CODE = "\033[96m" +RESET_COLOR_CODE = "\033[00m" + +UNION_ENTITIES = Union[ + AudioSampleMetaData, + Calendars, + Courses, + Profs, + QuestionAnswerPair, + ProfessorSectionView, +] +UNION_PROPERTIES = Union[ProfessorsProperties] + +default_tag_column_dict = { + Calendars: {"date"}, + Courses: {"course_name", "course_num", "dept"}, + Locations: {"building_number", "name"}, + Profs: {"first_name", "last_name"}, + Clubs: {"club_name"}, + Sections: {"section_name"}, + ProfessorSectionView: {"first_name", "last_name"}, +} + + +class BadDictionaryKeyError(Exception): + """Raised when the given JSON/dict is missing some required fields. + + Attributes: + message: an explanation of what fields are missing. + """ + + def __init__(self, message: str): + self.message = message + + +class BadDictionaryValueError(Exception): + """Raised when the given JSON/dict has unexpected data + + Attributes: + message: an explanation. + """ + + def __init__(self, message: str): + self.message = message + + +class InvalidOperationOnView(Exception): + """Raised when trying to perform insert/update operations on a View. + + Attributes: + message: an explanation. + """ + + def __init__(self, message: str): + self.message = message + + +class NimbusDatabaseError(Exception): + """Raised when we have a database querying problem. + + Attributes: + message: an explanation of the data querying problem. + """ + + def __init__(self, message: str): + self.message = message + + +class UnsupportedDatabaseError(Exception): + """Raised when operation tries to connect to an unsupported database type. + + Attributes: + message: an explanation of why the expected database is unsupported. + """ + + def __init__(self, message: str): + self.message = message + + +class BadConfigFileError(Exception): + """Raised when the config.json file is badly formatter (e.g missing field). + + Attributes: + message: an explanation. + """ + + def __init__(self, message: str): + self.message = message + + +""" +utilities.py +""" + + +def get_current_time(): + """ + Useful for answering questions like "Is prof availible now/tomorrow?" + """ + pass + + +class NimbusDatabase(ABC): + """ + An abstract class for interacting with the Nimbus database. + Concrete subclasses, such as NimbusMySQL, + should implement these operations such as `connect` + """ + + def __init__(self, config_file: str = "config.json") -> None: + """ + Inits Nimbus Database using the hostname, username, password + found inside the config_file. + """ + pass + + @abstractmethod + def get_property_from_entity( + self, + prop: List[str], + entity: str, + condition_field: Optional[str] = None, + condition_value: Optional[str] = None, + ) -> List[str]: + """A high-order function to get properties from objects in the database. + + Example: + >>> db = NimbusDatabase("config.json") + >>> db.get_property_from_entity(["firstName", "lastName"], + "Professors") + [("Foaad", "Khosmood"), ("John", "Clements"), ...] + + >>> db.get_property_from_entity(["firstName", "lastName"], + "Professors", "firstName", "Foaad") + [("Foaad", "Khosmood")] + + Args: + entity: a string representing a table in the database. + prop: string(s) representing a field in the given table. + condition_field: (optional) string representing the column name. + condition_value: (optional) string representing the cell value. + + Returns: + The list of prop of the entity (e.g. firstName of Professor) + """ + pass + + @abstractmethod + def get_property_from_related_entities( + self, + prop: List[str], + entity1: str, + entity2: str, + key1: str, + key2: Optional[str] = None, + condition_field: Optional[str] = None, + condition_value: Optional[str] = None, + ) -> List[str]: + """A higher-order function to ???? + + + Example: + >>> db = NimbusDatabase("config.json") + >>> db.get_property_from_related_entities( + ["firstName", "lastName", "ohRoom"], + "Professors", "OfficeHours", "professorId") + [("Foaad", "Khosmood", "14-213"), ("John", "Clements", "14-210"), ...] + + >>> db.get_property_from_related_entities( + ["firstName", "lastName"], + "Professors", "OfficeHours", + "professorId", "firstName", "Foaad") + [("Foaad", "Khosmood", "14-213")] + + Args: + entity: TODO + prop: TODO + + Returns: + TODO + """ + pass + + @abstractmethod + def get_entities(self) -> str: + pass + + @abstractmethod + def get_fields_of_entity(self, entity1: str) -> str: + """ + TODO: + given an entity, + return all the field names of that table in the database. + """ + pass + + @abstractmethod + def get_unique(self, entity) -> str: + pass + + @abstractmethod + def get_bitcount(self, entity) -> str: + pass + + @abstractmethod + def close(self) -> None: + """ + Simple Implementation Example: + ``` + self.connection.close() + super().close() + ``` + """ + print("database connection was closed.") + pass + + def __del__(self) -> None: + """ + This method can make sure that the database connection is closed + before garbage references are collected. + + There are reasons to not use `__del__`: + https://stackoverflow.com/q/1481488 + + Example: + >>> import database_wrapper + >>> db = database_wrapper.NimbusMySQL() + >>> del db + database object is being garbage collected... + database connection was closed. + """ + print("database object is being garbage collected...") + self.close() + return + + +def raises_database_error(func): + """A Python decorator for mapping to NimbusDatabaseError + + Resources: + https://realpython.com/primer-on-python-decorators/#simple-decorators + https://docs.python.org/3/library/exceptions.html + """ + + def wrapper(*args, **kwargs): + try: + func(*args, **kwargs) + except sqlalchemy.exc.DataError as e: + # TODO: consider security tradeoff of displaying + # internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins, so try to raise a smaller message + raise NimbusDatabaseError(str(e.args)) from e + except Exception as e: + # TODO: consider security tradeoff of displaying + # internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins, so try to catch the EXACT exception + raise e + + return wrapper + + +class NimbusMySQLAlchemy: # NimbusMySQLAlchemy(NimbusDatabase): + """ + """ + + def __init__(self, config_file: str = "config.json") -> None: + self.engine = self._create_engine(config_file) + self.AudioSampleMetaData = AudioSampleMetaData + self.Calendars = Calendars + self.Clubs = Clubs + self.Courses = Courses + self.Locations = Locations + self.OfficeHours = OfficeHours + self.ProfessorSectionView = ProfessorSectionView + self.Profs = Profs + self.Professors = Professors + self.QueryFeedback = QueryFeedback + self.QuestionAnswerPair = QuestionAnswerPair + self.QuestionLog = QuestionLog + self.ErrorLog = ErrorLog + self.Sections = Sections + self.inspector = inspect(self.engine) + self._create_database_session() + print("initialized NimbusMySQLAlchemy") + + def _create_engine(self, config_file: str) -> Engine: + with open(config_file) as json_data_file: + config = json.load(json_data_file) + + if config.get("mysql", False): + mysql_config = config["mysql"] + RDBMS = "mysql" + PIP_PACKAGE = "mysqlconnector" + SQLALCHEMY_DATABASE_URI = "{}+{}://{}:{}@{}:{}/{}".format( + RDBMS, + PIP_PACKAGE, + mysql_config["user"], + mysql_config["password"], + mysql_config["host"], + mysql_config["port"], + mysql_config["database"], + ) + engine = create_engine(SQLALCHEMY_DATABASE_URI) + + if engine is not None: + return engine + else: + raise BadConfigFileError("failed to connect to MySQL") + + else: + msg = "config.json is missing {} field.".format("mysql") + raise BadConfigFileError(msg) + + @staticmethod + def validate_input_keys(input_data: dict, expected_keys: set): + if len(input_data) == 0: + msg = "expected: {} but got: {}" + msg = msg.format(expected_keys, set(input_data.keys())) + raise BadDictionaryKeyError(msg) + + # assert that the input_data does not have extra keys + for k in input_data: + if k not in expected_keys: + msg = "expected: {} but got: {}" + msg = msg.format(expected_keys, set(input_data.keys())) + raise BadDictionaryKeyError(msg) + + # assert that the keys_i_care_about are in input_data + for k in expected_keys: + if k not in input_data: + msg = "expected: {} but got: {}" + msg = msg.format(expected_keys, set(input_data.keys())) + raise BadDictionaryKeyError(msg) + + def _create_all_tables(self): + def __safe_create(SQLAlchemy_object): + table_name = SQLAlchemy_object.__tablename__ + print(f"creating {table_name}...") + if table_name in self.inspector.get_table_names(): + print(f"<{table_name}> already exists") + return + SQLAlchemy_object.__table__.create(bind=self.engine) + print(f"<{table_name}> created") + return + + for entity_type in EXPECTED_KEYS_BY_ENTITY.keys(): + __safe_create(getattr(self, entity_type.__name__)) + + def _create_database_session(self): + Session = sessionmaker(bind=self.engine) + self.session = Session() + print("initialized database session") + + def get_all_qa_pairs(self): + qa_entity = QuestionAnswerPair + + query_session = self.session.query( + qa_entity.question_format, qa_entity.answer_format + ) + result = query_session.all() + + return result + + def get_unvalidated_qa_data(self, numQueries: int): + qa = QuestionAnswerPair + + query_session = ( + self.session.query( + qa.id, + qa.can_we_answer, + qa.question_format, + qa.answer_format, + qa.verified, + qa.answer_type, + ) + .filter(qa.verified == 0) + .limit(numQueries) + ) + result = query_session.all() + unvalidated_qa_pairs = [] + for qa_pair in result: + formatted_QA_pair = { + "id": qa_pair[0], + "can_we_answer": qa_pair[1], + "question_format": qa_pair[2], + "answer_format": qa_pair[3], + "verified": qa_pair[4], + "answer_type": qa_pair[5].name, + } + unvalidated_qa_pairs.append(formatted_QA_pair) + + return unvalidated_qa_pairs + + def get_all_answerable_pairs(self): + qa_entity = QuestionAnswerPair + + query_session = self.session.query( + qa_entity.question_format, qa_entity.answer_format, qa_entity.can_we_answer + ) + result = query_session.all() + true_result = [(pair[0], pair[1]) for pair in result if pair[2] == True] + + return true_result + + def return_qa_pair_csv(self): + data = self.get_all_qa_pairs() + + with open("qa_pair.csv", "w") as out: + csv_out = csv.writer(out) + csv_out.writerow(["question_format", "answer_format"]) + for row in data: + csv_out.writerow(row) + + def partial_fuzzy_match(self, tag_value, identifier): + return fuzz.partial_ratio(tag_value, identifier) + + def full_fuzzy_match(self, tag_value, identifier): + return fuzz.ratio(tag_value, identifier) + + def _get_property_from_entity( + self, + prop: str, + entity: UNION_ENTITIES, + identifier: str, + tag_column_map: dict = default_tag_column_dict, + ): + """ + This function implements the abstractmethod to get a column of values + from a NimbusDatabase entity. + + Example: + >>> db = NimbusMySQLAlchemy() + >>> db.get_property_from_entity( + prop="email", + entity=Entity.Professors.Professors, + entity_string="Khosmood", + ) + >>> ["foaad@calpoly.edu"] + + Args: + prop: the relevant property value to retrieve from matching entities + entity: the type of entity we want to get the property from + identifier: a string that identifies the entity in some way (i.e., a professor's name) + tag_column_map: a dictionary mapping entity types to columns that identify the entities + ex: + {Professors: {"firstName", "lastName"}} + + Returns: + A list of values for `prop`, + such that the `entity` matches `identifier`. + """ + + MATCH_THRESHOLD = 80 + + identifier = identifier.lower() + # TODO: be smart by check only Professor.firstName Professor.lastName + # TODO: only check Course.dept, Course.course_num, Course.course_name + tag_props = [] + for k in entity.__dict__: + if k in tag_column_map[entity]: + tag_props.append(k) + + results = [] + query_obj = self.session.query(entity) + for row in query_obj.all(): + total_similarity = 0 + tags = [] + for tag_prop in tag_props: + total_similarity += self.full_fuzzy_match( + str(row.__dict__[tag_prop]).lower(), identifier + ) + tags.append(str(row.__dict__[tag_prop])) + + if total_similarity > MATCH_THRESHOLD: + results.append((total_similarity, tags, str(row.__dict__[prop]))) + + if len(results) < 1: + return None + + sorted_results = sorted(results, key=lambda pair: pair[0]) + print(sorted_results) + return sorted_results + + def get_property_from_entity( + self, + prop: str, + entity: UNION_ENTITIES, + identifier: str, + tag_column_map: dict = default_tag_column_dict, + ): + + props = self._get_property_from_entity(prop, entity, identifier, tag_column_map) + if props is None: + return None + return props[-1][2] + + def get_course_properties( + self, department: str, course_num: Union[str, int] + ) -> List[Courses]: + return ( + # sqlalchemy doesn't use type annotations + # and thus does not necessarily promise a List[Courses] + # even so we can expect .all() to return a list + # so long as there is no error in the MySQL syntax + self.session.query(Courses) + .filter(Courses.dept == department, Courses.courseNum == course_num) + .all() + ) + + def validate_and_format_entity_data(self, entity_type, data_dict: dict): + """ + Validates that the data_dict's fields matches the entity_type's fields, and formats the data_dict + if necessary. + + data_dict should be a dictionary of field names and values, looking like: + { + "fieldOne": valueOne, + "..." : ... + } + + Raises: + BadDictionaryKeyError - ... + BadDictionaryValueError - ... + + Returns: + The formatted data_dict if there was formatted run, otherwise an unmodified data_dict + """ + + format_method_by_entity = { + AudioSampleMetaData: self.format_audio_sample_meta_data_dict, + QuestionAnswerPair: self.format_query_phrase_dict, + QueryFeedback: self.format_query_feedback_dict, + QuestionLog: self.format_question_log, + ErrorLog: self.format_error_log, + } + + # Format data (if needed), and validate data + if entity_type in format_method_by_entity: + data_dict = format_method_by_entity[entity_type](data_dict) + + self.validate_input_keys(data_dict, EXPECTED_KEYS_BY_ENTITY[entity_type]) + return data_dict + + def add_entity(self, entity) -> bool: + """ + A simplified version of insert_entity that relies on the entity performing its own formatting in the constructor call. + Parameters + --------- + `entity - Entity` an initialized entity object to be added to the database. + + Returns + ------- + `bool` whether entity was successfully added. + """ + # Don't post if the entity doesn't abide by the rules of the Entity superclass + if not isinstance(entity, Entity): + return False + print("Saving to database: {}...".format(entity)) + try: + self.session.add(entity) + self.session.commit() + print("{}Saved!\n{}".format(GREEN_COLOR_CODE, RESET_COLOR_CODE)) + except: + return False + return True + + def insert_entity(self, entity_type, data_dict: dict) -> Entity: + """ + Inserts an entity into the database. The keys of data_dict should follow camelCase + so they can be translated into snake_case. + + data_dict should be a dictionary of field names and values, looking like: + { + "fieldOne": valueOne, + "..." : ... + } + + Raises: + BadDictionaryKeyError - ... + BadDictionaryValueError - ... + + Returns: + True if all is good, else False + """ + # Initialize the entity and check if it's a View + entity = entity_type() + if entity.is_view: + msg = "insert_entity for View: {} is not supported" + raise InvalidOperationOnView(msg.format(entity_type)) + + # Get formatted data, entity attributes, and entity object + formatted_data = self.validate_and_format_entity_data(entity_type, data_dict) + entity_attributes = entity_type.__dict__ + + # Logging... + print( + "{}Inserting into {}...{}".format( + CYAN_COLOR_CODE, entity_attributes["__tablename__"], RESET_COLOR_CODE + ) + ) + + # Grab the entity class fields by cleaning the attributes dictionary + # Note: Make sure you don't label any important data fields with underscores in the front or back! + entity_fields = list( + dict( + filter( + lambda i: not (i[0][0] == "_" or i[0][-1] == "_"), + entity_attributes.items(), + ) + ).keys() + )[1:-1] + + # Ignore the first field, since it's assumed to be a primary key + # Ignore the last field, since it's the is_view boolean + # Populate the entity with values from formatted_data + for entity_field in entity_fields: + setattr(entity, entity_field, formatted_data[entity_field]) + + # Perform the INSERT + print("Saving to database: {}...".format(entity)) + self.session.add(entity) + self.session.commit() + print("{}Saved!\n{}".format(GREEN_COLOR_CODE, RESET_COLOR_CODE)) + + return entity + + def update_entity(self, entity_type, data_dict: dict, filter_fields: list) -> bool: + """ + Updates an entity in the database. The keys of data_dict should follow camelCase + so they can be translated into snake_case. + + data_dict should be a dictionary of field names and values, looking like: + { + "fieldOne": valueOne, + "..." : ... + } + + filter_fields is a list of variable names (strings) to match for when running an + update query. If not provided, raises an error since it's not an update. + + ex: passing in filter_fields=["name", "title"] will check for an existing entity that has the + same 'name' and 'title' values in the data_dict. + + Raises: + RuntimeError - ... + BadDictionaryKeyError - ... + BadDictionaryValueError - ... + + Returns: + True if all is good, else False + """ + + if issubclass(entity_type, Entity): + if "id" not in data_dict: + raise BadDictionaryKeyError( + "Include an 'id' field so the element to update can be identified." + ) + updated_entity = self.session.query(entity_type).get(data_dict["id"]) + updated = updated_entity.update(data_dict) + if not updated: + return False + self.session.commit() + return True + # Initialize dummy entity to check if it's a View + dummy_entity = entity_type() + if dummy_entity.is_view: + msg = "update_entity for View: {} is not supported" + raise InvalidOperationOnView(msg.format(entity_type)) + + # If we're not filtering for anything, we shouldn't be calling update_entity + if len(filter_fields) == 0: + msg = "update_entity for {} requires filter_fields list to filter by." + raise RuntimeError(msg.format(entity_type)) + + # Get formatted data and entity attributes + formatted_data = self.validate_and_format_entity_data(entity_type, data_dict) + entity_attributes = entity_type.__dict__ + + # Run a SELECT query to see if an entity that matches the values under the fields in the filter_fields list exists + query = self.session.query(entity_type) + for field in filter_fields: + query = query.filter(getattr(entity_type, field) == formatted_data[field]) + entity = query.first() + + # Logging... + if entity: + print( + "{}Updating {} in {}...{}".format( + YELLOW_COLOR_CODE, + entity, + entity_attributes["__tablename__"], + RESET_COLOR_CODE, + ) + ) + else: + entity = entity_type() + print( + "{}Matching Entity not found - Inserting {} in {}...{}".format( + YELLOW_COLOR_CODE, + entity, + entity_attributes["__tablename__"], + RESET_COLOR_CODE, + ) + ) + + # Grab the entity class fields by cleaning the attributes dictionary + # Note: Make sure you don't label any important data fields with underscores in the front or back! + entity_fields = list( + dict( + filter( + lambda i: not (i[0][0] == "_" or i[0][-1] == "_"), + entity_attributes.items(), + ) + ).keys() + )[1:-1] + + # Ignore the first field, since it's assumed to be a primary key + # Ignore the last field, since it's the is_view boolean + # Populate the entity with values from formatted_data + for entity_field in entity_fields: + setattr(entity, entity_field, str(formatted_data[entity_field])) + + # Perform the actual UPDATE/INSERT + print("Saving to database: {}...".format(entity)) + self.session.add(entity) + self.session.commit() + print("{}Saved!\n{}".format(GREEN_COLOR_CODE, RESET_COLOR_CODE)) + + return True + + def format_audio_sample_meta_data_dict(self, data_dict: dict) -> dict: + """ + data_dict at this point looks like: + { + "isWakeWord": True, + "firstName": "jj", + "lastName": "doe", + "gender": "f", + "noiseLevel": "q", + "location": "here", + "tone": "serious-but-not-really", + "timestamp": 1577077883, + "username": "guest", + "emphasis": "us", + "script": "Nimbus" + "audio_file_id": Id from Google Drive # noqa because too hard. + } + + Raises: + BadDictionaryValueError - ... + + Returns: + A new, formatted data dictionary + """ + + is_wake_word_by_label = {"ww": True, "nww": False, True: True, False: False} + + noise_level_by_label = { + "q": NoiseLevel.quiet, + "quiet": NoiseLevel.quiet, + "m": NoiseLevel.medium, + "medium": NoiseLevel.medium, + "l": NoiseLevel.loud, + "loud": NoiseLevel.loud, + } + + data_dict["first_name"] = data_dict.pop("firstName") + data_dict["last_name"] = data_dict.pop("lastName") + + if data_dict["isWakeWord"] in is_wake_word_by_label: + data_dict["is_wake_word"] = is_wake_word_by_label[ + data_dict.pop("isWakeWord") + ] + else: + msg = "unexpected values for isWakeWord\n" + msg += "expected 'ww' or True or 'nww' or False but got '{}'" + msg = msg.format(data_dict["isWakeWord"]) + raise BadDictionaryValueError(msg) + + if data_dict["noiseLevel"] in noise_level_by_label: + data_dict["noise_level"] = noise_level_by_label[data_dict.pop("noiseLevel")] + else: + msg = "unexpected values for noiseLevel\n" + msg += "expected 'q' or 'm' or 'l' but got '{}'" + msg = msg.format(data_dict["noiseLevel"]) + raise BadDictionaryValueError(msg) + + return data_dict + + def delete_entity(self, entity_type: Entity, identifier) -> bool: + """ + Deletes entity with matching identifier from its table. + Parameters + ---------- + `entity_type:Entity` The class of entity. This is used to relate the identifier to a specific table. + + `identifier`: The unique `primary_key` of the desired entity. + Returns + ------- + `bool` Whether the operation was successfully completed. + """ + try: + target_entity = self.session.query(entity_type).get(identifier) + self.session.delete(target_entity) + self.session.commit() + except: + return False + return True + + def format_query_phrase_dict(self, phrases: dict) -> dict: + """ + Formats query phrase to be saved to the server. + + Parameters + ---------- + `phrases : dict` A question answer pair: + - {question: {format: str, variables: str}, answer: {format: str, variables: str}} + + Raises + ------ + BadDictionaryValueError + + Returns + ------- + dict + formatted for the server + """ + return { + "can_we_answer": False, + "verified": False, + "answer_type": AnswerType.other, # Will change after verified + "question_format": phrases["question"]["format"], + "answer_format": phrases["answer"]["format"], + } + + def format_question_log(self, question_info: dict) -> dict: + """ + Extracts question data from the provided dictionary to upload data to the server. + """ + + return { + "question": question_info["question"], + "timestamp": datetime.datetime.now(), + } + + def format_error_log(self, error_info: dict) -> dict: + """ + Extracts question data from the provided dictionary to upload data to the server. + """ + + return { + "question": error_info["question"], + "stacktrace": error_info["stacktrace"], + "timestamp": datetime.datetime.now(), + } + + def __del__(self): + print("NimbusMySQLAlchemy closed") + + def format_query_feedback_dict(self, feedback: dict) -> dict: + """ + Formats query feedback to be saved to the server. + + Parameters + ---------- + `feedback : dict` A query feedback: + - {question: String, answer: String, type: String, timestamp: Datetime} + + Raises + ------ + BadDictionaryValueError + + Returns + ------- + dict + formatted for the server + """ + answer_string_to_type = { + "fact": AnswerType.fact, + "related": AnswerType.related, + "stats": AnswerType.statistics, + "other": AnswerType.other, + } + + return { + "question": feedback["question"], + "answer": feedback["answer"], + "answer_type": answer_string_to_type[feedback["type"]], + "timestamp": feedback["timestamp"], + } + + +if __name__ == "__main__": + db = NimbusMySQLAlchemy() + print(db._get_property_from_entity("section_name", ProfessorSectionView, "Braun")) diff --git a/db_config_sample.py b/db_config_sample.py deleted file mode 100644 index 96995de..0000000 --- a/db_config_sample.py +++ /dev/null @@ -1,8 +0,0 @@ -# global dictionary should be a secret file -CONFIG = { - 'host': '', - 'user': 'username', - 'password': 'password', - 'sql_dir': 'sql', - 'create_file': 'csai_nov_8_2019_create_script.min.sql' -} diff --git a/db_wrapper.py b/db_wrapper.py deleted file mode 100755 index 3239757..0000000 --- a/db_wrapper.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python3 -import mysql.connector as m -from os.path import join -from db_config import CONFIG -import time - -def connect(): - cxn = m.connect( - host=CONFIG['host'], - user=CONFIG['user'], - passwd=CONFIG['password'] - ) - return cxn - -def get_databases(cxn): - """ - Args: - cxn: the MySQL database connection object - Returns: - a tuple of strings of database names - """ - cursor = cxn.cursor() - cursor.execute('SHOW DATABASES') - # cursor.fetchall() returns a list of singleton tuples - tups = cursor.fetchall() - cursor.close() - return [x[0] for x in tups] - - -def get_tables(cxn, database_name): - """ - Args: - cxn: the MySQL database connection object - database_name: the name of the database to get tables from - Returns: - a tuple of strings of database names - """ - acceptable_list = ['dev'] - assert database_name in acceptable_list, "unexpected database_name" - - cursor = cxn.cursor() - cursor.execute('use ' + database_name) - cursor.execute('show tables') - # cursor.fetchall() returns a list of singleton tuples - tups = cursor.fetchall() - cursor.close() - return [x[0] for x in tups] - - -def run_create_script(cxn, filename): - """ - Args: - cxn: the MySQL database connection object - Returns: - True if succeeded - """ - c = cxn.cursor() - with open(filename, 'r') as f: - c.execute(f.read()) - time.sleep(2) - c.close() - return True - - -def what_courses_can_i_take(cxn): - """Answers the ultimate question of the meaning of life the universe and everything - which so happens to be the same as "What courses Cal Poly offers" - - Args: - cxn: the MySQL database connection object - Returns: - A list of course names that Cal Poly offers - """ - c = cxn.cursor() - - c.execute("use dev") # TODO: make this modular - c.execute("SELECT courseName FROM Courses") - # TODO: actually return False error - - tups = c.fetchall() - - # close the cursor - c.close() - - return [x[0] for x in tups] - - -if __name__ == "__main__": - cxn = connect() - - print("getting databases...", get_databases(cxn)) - print("getting tables...", get_tables(cxn, 'dev')) - - script = join(CONFIG['sql_dir'], CONFIG['create_file']) - - # print("running create script...") - # assert run_create_script(cxn, script) == True, "uh oh failed to create" - - cxn.close() - cxn = connect() - - print("getting tables...", get_tables(cxn, 'dev')) - - print(what_courses_can_i_take(cxn)) - - cxn.close() \ No newline at end of file diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..de792e1 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# +# This script will just deploy using gunicorn locally, without docker. +# +# This deployment script is not concerned with SSL encryption. +# It just gets the code running on multiple CPU processors. +# +gunicorn flask_api:app --config=gunicorn_config.py \ No newline at end of file diff --git a/flask_api.py b/flask_api.py new file mode 100755 index 0000000..9dd5310 --- /dev/null +++ b/flask_api.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python3 +"""An API endpoint module. + +Contains all the handlers for the API. Also the main code to run Flask. +""" +from sqlalchemy.exc import OperationalError, InvalidRequestError + +from flask import Flask, jsonify, request +from werkzeug.exceptions import BadRequest + +# traceback added for stacktrace logging +import traceback + +from flask_cors import CORS +from pydrive.auth import GoogleAuth +from pydrive.drive import GoogleDrive +import json + +import gunicorn_config +from Entity.Calendars import Calendars +from Entity.Clubs import Clubs +from Entity.Courses import Courses +from Entity.Locations import Locations +from Entity.Sections import Sections +from Entity.Professors import Professors +from database_wrapper import ( + BadDictionaryKeyError, + BadDictionaryValueError, + NimbusDatabaseError, + NimbusMySQLAlchemy, +) +from modules.formatters import WakeWordFormatter +from modules.validators import ( + WakeWordValidator, + WakeWordValidatorError, + PhrasesValidator, + PhrasesValidatorError, + FeedbackValidator, + FeedbackValidatorError, +) + +from Entity.AudioSampleMetaData import AudioSampleMetaData +from Entity.QuestionAnswerPair import QuestionAnswerPair +from Entity.QueryFeedback import QueryFeedback +from Entity.QuestionLog import QuestionLog +from Entity.ErrorLog import ErrorLog + +from Entity.EntityToken import EntityToken + +from nimbus import Nimbus + +import json + +BAD_REQUEST = 400 +SUCCESS = 200 +SERVER_ERROR = 500 + + +CONFIG_FILE_PATH = "config.json" + +app = Flask(__name__) +CORS(app) + + +# NOTE: +# 1. Flask "@app.route" decorated functions below commonly use a db or nimbus object +# 2. Because the decorated functions can't take parameters (because they're called by +# the flask web server) the database and nimbus objects must be global +# 3. Instantiating objects at the global level (especially ones that are resource-intensive +# to create like db and nimbus objects) is obviously bad practice +# +# Due to these points, the very un-Pythonic solution chosen is to initialize these objects as +# None at the top level, associate them with actual objects in the `initialize*()` functions, +# and do None checks in the functions below. + +db = None +nimbus = None + + +def init_nimbus_db(): + global db + global nimbus + + # If not connected to db, initialize db connection and Nimbus client + if db is None: + db = NimbusMySQLAlchemy(config_file=CONFIG_FILE_PATH) + nimbus = Nimbus(db) + # If not connected, reset db and Nimbus client + else: + try: + db.engine.connect() + except OperationalError: + db = NimbusMySQLAlchemy(config_file=CONFIG_FILE_PATH) + nimbus = Nimbus(db) + + +# returns the question from the request body, if applicable +def get_question() -> str: + if request.is_json is False: + raise BadRequest(description="request must be JSON") + request_body = request.get_json() + question = request_body.get("question", None) + # no reason for a custom exception here + if question is None: + raise BadRequest(description="request body should include the question") + else: + return question + + +def handle_database_error(error): + global db + # checks if the session has any changes (new objects, changed objects, + # or deleted objects) - these should be rolled back in the case of an exception + if db.session.new or db.session.dirty or db.session.deleted: + print("Rolling back") + db.session.rollback() + if isinstance(error, OperationalError) or db is None: + # we *probably* have a bad session - try and roll it back, + # then create a new database connection. + db.session.close() + db = None + init_nimbus_db() + + +def log_error(error, question): + error_entry = {"question": question, "stacktrace": traceback.format_exc()} + db.insert_entity(ErrorLog, error_entry) + + +@app.errorhandler(Exception) +def handle_all_errors(e): + # we should still be able to extract the question from the request, if one + # was asked. We can retry the question once. + handle_database_error(e) + question = None + try: + question = get_question() + except BadRequest as e: + # the question is already None, but we need to catch this exception + pass + log_error(e, question) + return jsonify({"ErrorLog": type(e).__name__}), SUCCESS + + +@app.route("/", methods=["GET", "POST"]) +def hello(): + """ + always return SUCCESS (200) code on this route, to serve as a health check. + """ + if request.method == "POST": + request_body = request.get_json() + return jsonify({"you sent": request_body}), SUCCESS + else: + + response_json = jsonify({"name": "hello {}".format(str(app))}) + return response_json, SUCCESS + + +def generate_session_token() -> str: + return "SOME_NEW_TOKEN" + + +@app.route("/ask", methods=["POST"]) +def handle_question(): + """ + POST (not GET) request because the `question` is submitted + and an `answer` is "created." Also, some side-effects on the + server are: + * storage of the logs of this question-answer-session. + """ + try: + + init_nimbus_db() + + try: + question = get_question() + except (BadRequest) as e: + return e.description, BAD_REQUEST + + try: + entity = db.insert_entity(QuestionLog, {"question": question}) + except (Exception) as e: + print("Could not store question upon user ask: ", str(e)) + + response = {"answer": nimbus.answer_question(question)} + # extracting the question checks if we have json, so we should be good here + request_body = request.get_json() + if "session" in request_body: + response["session"] = request_body["session"] + else: + response["session"] = generate_session_token() + + return jsonify(response), SUCCESS + + except Exception as e: + log_error(e, question) + response = {"answer": "oops, something went wrong... Try another question"} + return jsonify(response), SERVER_ERROR + + +@app.route("/new_data/wakeword", methods=["POST"]) +def save_a_recording(): + """Given the audio metadata & audio file, resamples it, saves to storage. + """ + if "wav_file" not in request.files: + return ( + "Please provide an audio file under the key 'wav_file' in your FormData", + BAD_REQUEST, + ) + validator = WakeWordValidator() + formatter = WakeWordFormatter() + data = request.form + issues = validator.validate(data) + if issues: + try: + data = validator.fix(data, issues) + except WakeWordValidatorError as err: + return str(err), BAD_REQUEST + formatted_data = formatter.format(data) + filename = create_filename(formatted_data) + try: + file_id = save_audiofile(filename, request.files["wav_file"]) + except Exception as err: + return f"Failed to save audio file because... {err}", BAD_REQUEST + + formatted_data["audio_file_id"] = file_id + + init_nimbus_db() + + try: + db.insert_entity(AudioSampleMetaData, formatted_data) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return f"Successfully stored audiofile as '{filename}'", SUCCESS + + +@app.route("/new_data/office_hours", methods=["POST"]) +def save_office_hours(): + """ + Persists list of office hours + """ + init_nimbus_db() + + data = json.loads(request.get_json()) + for professor in data: + try: + process_office_hours(data[professor], db) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return "SUCCESS" + + +@app.route("/new_data/phrase", methods=["POST"]) +def save_query_phrase(): + validator = PhrasesValidator() + data = json.loads(request.get_json()) + try: + issues = validator.validate(data) + except: + return ( + "Please format the query data: {question: {text: string, variables: list}, answer: {text: string, variables: list}}", + BAD_REQUEST, + ) + if issues: + try: + data = validator.fix(data, issues) + except PhrasesValidatorError as err: + print("error", err) + return str(err), BAD_REQUEST + + init_nimbus_db() + + try: + entity_saved = db.insert_entity(QuestionAnswerPair, data) + except (BadDictionaryKeyError, BadDictionaryValueError) as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), SERVER_ERROR + except Exception as e: + raise e + + if entity_saved: + return "Phrase has been saved", SUCCESS + else: + return "An error was encountered while saving to database", SERVER_ERROR + + +@app.route("/data/get_phrase/", methods=["GET"]) +def get_phrase(numQueries): + init_nimbus_db() + try: + # if no phrases are unvalidated, will return an empty list + return {"data": db.get_unvalidated_qa_data(numQueries)}, SUCCESS + except NimbusDatabaseError as e: + return str(e), SERVER_ERROR + except Exception as e: + raise e + + +@app.route("/new_data/update_phrase", methods=["POST"]) +def update_query_phrase(): + init_nimbus_db() + data = request.get_json() + try: + updated = db.update_entity(QuestionAnswerPair, data, []) + except (BadDictionaryKeyError, BadDictionaryValueError) as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), SERVER_ERROR + except Exception as e: + raise e + + return ( + ("Phrase updated!", SUCCESS) + if updated + else ("Failed to update phrase", SERVER_ERROR) + ) + + +@app.route("/new_data/delete_phrase", methods=["POST"]) +def delete_query_phrase(): + init_nimbus_db() + data = request.get_json() + if "id" not in data or type(data["id"]) != int: + return "Please provide 'id' as an integer" + identifier = data["id"] + try: + deleted = db.delete_entity(QuestionAnswerPair, identifier) + except (BadDictionaryKeyError, BadDictionaryValueError) as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), SERVER_ERROR + except Exception as e: + raise e + + return ( + ("Phrase deleted!", SUCCESS) + if deleted + else ("Failed to delete phrase", SERVER_ERROR) + ) + + +@app.route("/entity_structure", methods=["GET"]) +def get_entity_structure(): + def get_class_info(entity): + keys = list(filter(lambda key: not key[0] == "_", entity.__dict__.keys())) + return {"attributes": keys, "synonyms": entity.synonyms} + + entities = { + "COURSE": get_class_info(Courses), + "CLUB": get_class_info(Clubs), + "PROF": get_class_info(Professors), + "LOCATION": get_class_info(Locations), + } + return jsonify(entities) + + +@app.route("/new_data/feedback", methods=["POST"]) +def save_feedback(): + validator = FeedbackValidator() + data = json.loads(request.get_json()) + try: + issues = validator.validate(data) + except: + return ( + "Please format the query data: {question: String, answer: String, type: String, timestamp: int}", + BAD_REQUEST, + ) + if issues: + try: + data = validator.fix(data, issues) + except FeedbackValidatorError as err: + print("error:", err) + return str(err), BAD_REQUEST + + init_nimbus_db() + + try: + entity = db.insert_entity(QueryFeedback, data) + except (BadDictionaryKeyError, BadDictionaryValueError) as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), SERVER_ERROR + except Exception as e: + raise e + + if entity: + return "Feedback has been saved", SUCCESS + else: + return "An error was encountered while saving to database", SERVER_ERROR + + +@app.route("/new_data/courses", methods=["POST"]) +def save_courses(): + """ + Persists list of courses + """ + + data = json.loads(request.get_json()) + init_nimbus_db() + + for course in data["courses"]: + try: + db.update_entity(Courses, course, ["dept", "course_num"]) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return "SUCCESS" + + +@app.route("/new_data/sections", methods=["POST"]) +def save_sections(): + """ + Persists list of sections + """ + data = json.loads(request.get_json()) + init_nimbus_db() + + for section in data["sections"]: + try: + db.update_entity(Sections, section, ["section_name"]) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return "SUCCESS" + + +@app.route("/new_data/clubs", methods=["POST"]) +def save_clubs(): + """ + Persists list of clubs + """ + + data = json.loads(request.get_json()) + init_nimbus_db() + + for club in data["clubs"]: + try: + db.update_entity(Clubs, club, ["club_name"]) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return "SUCCESS" + + +@app.route("/new_data/locations", methods=["POST"]) +def save_locations(): + """ + Persists list of locations + """ + + data = json.loads(request.get_json()) + init_nimbus_db() + + for location in data["locations"]: + try: + db.update_entity(Locations, location, ["longitude", "latitude"]) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return "SUCCESS" + + +@app.route("/new_data/professors", methods=["POST"]) +def save_professors(): + """ + Persists a list of professors + """ + data = json.loads(request.get_json()) + init_nimbus_db() + + for prof in data["professors"]: + try: + print("PROF:", prof) + db.update_entity(Professors, prof, ["email"]) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return "SUCCESS" + + +@app.route("/new_data/calendars", methods=["POST"]) +def save_calendars(): + """ + Persists list of calendars + """ + + data = json.loads(request.get_json()) + init_nimbus_db() + + for calendar in data["calendars"]: + try: + db.update_entity(Calendars, calendar, ["date", "raw_events_text"]) + except BadDictionaryKeyError as e: + return str(e), BAD_REQUEST + except BadDictionaryValueError as e: + return str(e), BAD_REQUEST + except NimbusDatabaseError as e: + return str(e), BAD_REQUEST + except Exception as e: + # TODO: consider security tradeoff of displaying internal server errors + # versus development time (being able to see errors quickly) + # HINT: security always wins + raise e + + return "SUCCESS" + + +@app.route("/schema/entity_tokens", methods=["GET"]) +def get_entity_tokens(): + init_nimbus_db() + try: + identifiers = db.session.query(EntityToken).all() + except: + return "Could not fetch at this time", BAD_REQUEST + data = list(map(lambda token: token.get_data(), identifiers)) + return jsonify(data) + + +@app.route("/schema/entity_tokens", methods=["POST"]) +def add_entity_token(): + init_nimbus_db() + data = request.get_json() + try: + new_token = EntityToken(data) + except Exception as ex: + return ex.args[0], BAD_REQUEST + token_added = db.add_entity(new_token) + if not token_added: + return "Could not add token", BAD_REQUEST + return "Added Token", SUCCESS + + +def create_filename(form): + """ + Creates a string filename that adheres to the Nimbus foramtting standard. + """ + order = [ + "isWakeWord", + "noiseLevel", + "tone", + "location", + "gender", + "lastName", + "firstName", + "timestamp", + "username", + ] + values = list(map(lambda key: str(form[key]).lower().replace(" ", "-"), order)) + return "_".join(values) + ".wav" + + +def process_office_hours(current_prof: dict, db: NimbusMySQLAlchemy): + """ + Takes the path to a CSV, reads the data row-by-row, + and stores the data to the database + + Ex: def process_office_hours( + current_prof: dict, + db: NimbusMySQLAlchemy + ) + + """ + # Set the entity type as the OfficeHours entity class + entity_type = db.OfficeHours + + # Check if the current entity is already within the database + if ( + db.get_property_from_entity( + prop="Name", entity=entity_type, identifier=current_prof["Name"] + ) + != None + ): + + update_office_hours = True + + else: + update_office_hours = False + + # String for adding each day of office hours + office_hours = "" + + # Split name for first and last name + split_name = current_prof["Name"].split(",") + + # Extract each property for the entity + last_name = split_name[0].replace('"', "") + first_name = split_name[1].replace('"', "") + + # Check that each extracted property is not empty then add it to + # the office hours string + if current_prof["Monday"] != "": + + # Check that the current property does not contain digits which + # implies that it is alternative information about availability + if any(char.isdigit() for char in current_prof["Monday"]) == False: + office_hours = current_prof["Monday"] + + # Otherwise it is a time + else: + office_hours += "Monday " + current_prof["Monday"] + ", " + + if current_prof["Tuesday"] != "": + office_hours += "Tuesday " + current_prof["Tuesday"] + ", " + + if current_prof["Wednesday"] != "": + office_hours += "Wednesday " + current_prof["Wednesday"] + ", " + + if current_prof["Thursday"] != "": + office_hours += "Thursday " + current_prof["Thursday"] + ", " + + if current_prof["Friday"] != "" and current_prof["Friday"] != "\n": + office_hours += "Friday " + current_prof["Friday"] + ", " + + # Generate the data structure for the database entry + sql_data = { + "name": last_name + ", " + first_name, + "last_name": last_name, + "first_name": first_name, + "office": current_prof["Office"], + "phone": current_prof["Phone"], + "email": current_prof["Email"], + "monday": current_prof["Monday"], + "tuesday": current_prof["Tuesday"], + "wednesday": current_prof["Wednesday"], + "thursday": current_prof["Thursday"], + "friday": current_prof["Friday"], + "office_hours": office_hours, + } + + # Update the entity properties if the entity already exists + if update_office_hours == True: + db.update_entity( + entity_type=entity_type, data_dict=sql_data, filter_fields=["Email"] + ) + + # Otherwise, add the entity to the database + else: + db.insert_entity(entity_type=entity_type, data_dict=sql_data) + + +def resample_audio(): + """ + Resample the audio file to adhere to the Nimbus audio sampling standard. + """ + pass + + +def save_audiofile(filename, content): + """ + Saves audio to the club Google Drive folder. + + Parameters + ---------- + - `filename:str` the name of the file, formatted by `create_filename()` + - `content: file` audio file to store + + Returns + ------- + The Google Drive file id that can be used to retrieve the file + """ + # Initialize our google drive authentication object using saved credentials, + # or through the command line + gauth = GoogleAuth() + gauth.CommandLineAuth() + # This is our pydrive object + drive = GoogleDrive(gauth) + # parent is our automatically uploaded file folder. The ID should be read in from + # folder_id.txt since we probably shouldn't have that ID floating around on GitHub""" + folder_id = get_folder_id() + file = drive.CreateFile( + { + "parents": [{"kind": "drive#fileLink", "id": folder_id}], + "title": filename, + "mimeType": "audio/wav", + } + ) + # Set the content of the file to the POST request's wav_file parameter. + file.content = content + file.Upload() # Upload file. + return file["id"] + + +def get_folder_id(): + with open("folder_id.txt") as folder_id_file: + return folder_id_file.readline() + + +def convert_to_mfcc(): + """Get this function from https://github.com/calpoly-csai/CSAI_Voice_Assistant""" + pass + + +if __name__ == "__main__": + app.run(host="0.0.0.0", debug=gunicorn_config.DEBUG_MODE, port=gunicorn_config.PORT) diff --git a/format.sh b/format.sh new file mode 100755 index 0000000..502f12e --- /dev/null +++ b/format.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +black . diff --git a/gunicorn_config.py b/gunicorn_config.py new file mode 100644 index 0000000..5b59588 --- /dev/null +++ b/gunicorn_config.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +""" +This python script will initialize variables +to configure Gunicorn workers that support the Flask API server. +""" + +from os import environ +import multiprocessing + +PORT = int(environ.get("PORT", 8080)) +DEBUG_MODE = int(environ.get("DEBUG_MODE", 1)) + +# Gunicorn config +bind = ":" + str(PORT) +# workers = multiprocessing.cpu_count() * 2 + 1 +# The above is considered bad practice - cpu_count() can be unreliable. +# We have 1 core, so 1 * 2 + 1 = 3 +workers = 3 +timeout = 90 diff --git a/happy.wav b/happy.wav new file mode 100644 index 0000000..a5298fb Binary files /dev/null and b/happy.wav differ diff --git a/heroku_deploy.sh b/heroku_deploy.sh new file mode 100755 index 0000000..e833883 --- /dev/null +++ b/heroku_deploy.sh @@ -0,0 +1,5 @@ +heroku container:login + +heroku container:push -a calpoly-csai-nimbus web --arg DATABASE_HOSTNAME,DATABASE_PASSWORD,DATABASE_USERNAME,DATABASE_NAME,PYDRIVE_CLIENT_ID,PYDRIVE_CLIENT_SECRET,GOOGLE_DRIVE_CREDENTIALS,GOOGLE_DRIVE_FOLDER_ID,GOOGLE_CLOUD_NLP_CREDENTIALS,GOOGLE_CLOUD_NLP_MODEL_NAME + +heroku container:release -a calpoly-csai-nimbus web diff --git a/lint.sh b/lint.sh new file mode 100755 index 0000000..80dfc7b --- /dev/null +++ b/lint.sh @@ -0,0 +1,38 @@ +#!/bin/bash + + +#flake8 . --count \ +# --select=E9,F63,F7,F82 \ +# --show-source \ +# --statistics \ +# --exclude .git,__pycache__,docs/source/conf.py,old,build,dist,venv \ +# --max-complexity 10 +# +# +## stop the build if there are Python syntax errors or undefined names +#flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics +## exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide +#flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + + +if [[ $1 == "--simple" ]] +then + # default is 79, but members of the team agreed upon a slight increase + # The GitHub editor is 127 chars wide + # ignore E772: do not use bare 'except' + flake8 --count \ + --ignore E722 \ + --show-source --statistics \ + --exclude .git,__pycache__,venv,build,dist,docs \ + --max-complexity 10 \ + --max-line-length=127 +else + # default is 79, but members of the team agreed upon a slight increase + # The GitHub editor is 127 chars wide + flake8 --count \ + --show-source --statistics \ + --exclude .git,__pycache__,venv,build,dist,docs \ + --max-complexity 10 \ + --max-line-length=127 +fi diff --git a/modules/formatters.py b/modules/formatters.py new file mode 100644 index 0000000..e92c9f2 --- /dev/null +++ b/modules/formatters.py @@ -0,0 +1,26 @@ +class Formatter: + """Formatter abstract class. Describes contract for all child formatters""" + + def __init__(self): + super().__init__() + + def format(self, data): + """Creates a copy of data, formats, and returns the formatted data""" + return data + + +class WakeWordFormatter(Formatter): + """Formats metadata for Wake Word audio""" + + def __init__(self): + super().__init__() + + def format(self, data): + # TODO: document this function better + # TODO: write test cases for this function + form = data.copy() + form["isWakeWord"] = "ww" if (form["isWakeWord"]) else "nww" + form["firstName"] = form["firstName"].title() + form["lastName"] = form["lastName"].title() + form["timestamp"] = int(form["timestamp"]) + return form diff --git a/modules/validators.py b/modules/validators.py new file mode 100644 index 0000000..24498fb --- /dev/null +++ b/modules/validators.py @@ -0,0 +1,348 @@ +import time +import enum +from abc import ABC +import datetime +import re + +from werkzeug.exceptions import BadRequestKeyError + + +class Validator(ABC): + def __init__(self): + super().__init__() + + def validate(self, data: dict) -> dict: + """ + Takes in a dictionary of data and returns a dictionary of issues + Parameters + ---------- + - `data : dict` form data to validate + + Returns + ------- + dict + issues with the data + """ + return data + + def fix(self, data: dict, issues) -> dict: + """ + Takes measures to fill in missing data in form, cloning, mutating and then returning the data. + If not possible, raises error. + + Parameters + ---------- + - `data : dict` form data to fix + - `issues: dict|list` lists of issues to fix for each form section + + Returns + ------- + dict + A fixed copy of the data + + Raises + ------ + Exception + when the issue with the data are not fixable. + """ + return data + + +class WakeWordValidatorIssue(enum.Enum): + DOES_NOT_EXIST = 1 + INVALID = 2 + + +class WakeWordValidatorError(Exception): + """Raised when.... bad data... + + Attributes: + message: an explanation of... why bad data... + """ + + def __init__(self, message: str): + self.message = message + + +class WakeWordValidator(Validator): + """Ensures that the form metadata recieved from the REST API for the Wake Word audio is valid.""" + + def __init__(self, validators=None): + super().__init__() + self.validators = validators or { + "isWakeWord": lambda val: type(val) == str + and (val == "true" or val == "false"), + "noiseLevel": lambda level: type(level) == str + and level in "qml" + and len(level) == 1, + "tone": lambda tone: type(tone) == str, + "location": lambda location: type(location) == str, + "gender": lambda gender: type(gender) == str + and gender in "mf" + and len(gender) == 1, + "lastName": lambda lastName: type(lastName) == str, + "firstName": lambda firstName: type(firstName) == str, + "timestamp": lambda timestamp: str.isdigit(timestamp), + "username": lambda username: type(username) == str, + "emphasis": lambda emphasis: type(emphasis) == str, + "script": lambda script: type(script) == str, + } + + def validate(self, data): + """ + Checks if the Wake Word audio metadata is complete and in its correct form. + Returns array of issues. + """ + issues = {} + for key in self.validators: + validator = self.validators[key] + try: + value = data[key] + if not validator(value): + issues[key] = WakeWordValidatorIssue.INVALID + except (KeyError, BadRequestKeyError) as e: + print("Couldn't find", e.args, "when validating data") + issues[key] = WakeWordValidatorIssue.DOES_NOT_EXIST + return issues + + def fix(self, data, issues): + """ + Attempts to fix Wake Word audio metadata. + If the data issue is irreplaceable, raises WakeWordValidatorError. + """ + form = data.copy() + for key in issues: + issue = issues[key] + if issue == WakeWordValidatorIssue.DOES_NOT_EXIST: + if key == "username": + form[key] = "guest" + print("fixed username", form[key]) + elif key == "timestamp": + form[key] = int(time.time()) + print("fixed timestamp", form[key]) + elif key == "script" and form["isWakeWord"] == "ww": + form[key] = "nimbus" + print("Added 'script' value of 'nimbus'") + else: + raise WakeWordValidatorError( + f"Required audio metadata '{key}' was not provided" + ) + elif issue == WakeWordValidatorIssue.INVALID: + # TODO: anticipate invalid entries and correct them. + raise WakeWordValidatorError( + f"{key} has invalid value of {form[key]} with a type of {type(form[key])}" + ) + return form + + +class PhrasesValidatorError(Exception): + """Unfixable data corruption in a query phrase object""" + + def __init__(self, message: str): + super().__init__(self, message) + self.message = message + + +class PhrasesValidatorIssue(enum.Enum): + INVALID = 0 + DELIMITER_MISMATCH = 1 + TOKEN_VAR_MISMATCH = 2 + + +class PhrasesValidator(Validator): + """Validates new query phrases passed from the web app""" + + def __init__(self): + super().__init__() + self.error_messages = { + PhrasesValidatorIssue.INVALID: "An unknown error occurred", + PhrasesValidatorIssue.DELIMITER_MISMATCH: "The {field_name} field has mismatched bracket token delimiters (square braces).", + PhrasesValidatorIssue.TOKEN_VAR_MISMATCH: "The {field_name} field has a differing number of format tokens and variables. Please pass the same number or tokens as variables", + } + + def validate(self, data: dict) -> dict: + """ + Ensures that: + 1. All tokens have an opening and closing delimiter. + 2. The number of tokens equals the number of provided variables. + + Parameters + ---------- + `data : dict` A question answer pairing {question: {format: str, variables: str}, answer: {format: str, variables: str}} + """ + issues = {"question": [], "answer": []} + for field, form in data.items(): + # All tokens have an opening and closing delimiter + if form["format"].count("]") != form["format"].count("["): + issues[field].append(PhrasesValidatorIssue.DELIMITER_MISMATCH) + # Number of tokens must equal number of variables + if len(form["variables"]) != form["format"].count("["): + issues[field].append(PhrasesValidatorIssue.TOKEN_VAR_MISMATCH) + return issues + + def fix(self, data: dict, issues: dict) -> dict: + """ + Fixes phrases data. + - Critical issues: + 1. Question delimiters don't match up. + 2. Question tokens don't the number of provided variables. + - Non critical issues: + 1. Anything wrong with the answer. In this case only the question will be stored. + + Parameters + ---------- + - `data : dict` A question answer pairing - {question: {format: str, variables: str}, answer: {format: str, variables: str}} + - `issues: dict` lists of PhrasesValidatorIssues for the quesion answer pairing - {} + + Returns + ------- + dict + A fixed copy of the data + + Raises + ------ + PhrasesValidatorError + when the issue with the phrase data is not fixable. + + """ + form = data.copy() + question = issues["question"] + answer = issues["answer"] + if len(question): + err_msg = self.error_messages[question[0]].format(field_name="question") + print(f"error message {err_msg}") + raise PhrasesValidatorError(err_msg) + if len(answer): + form["answer"]["format"] = "" + form["answer"]["variables"] = [] + return form + + +class FeedbackValidatorError(Exception): + """Unfixable data corruption in a query phrase object""" + + def __init__(self, message: str): + super().__init__(self, message) + self.message = message + + +class FeedbackValidatorIssue(enum.Enum): + MISSING_QUESTION = 0 + INVALID_TIMESTAMP = 1 + INVALID_TYPE = 2 + MISSING_ANSWER = 3 + CONVERT_UNIX_TO_DATETIME = 4 + + +class FeedbackValidator(Validator): + """Validates new query phrases passed from the web app""" + + def __init__(self): + super().__init__() + self.error_messages = { + FeedbackValidatorIssue.MISSING_QUESTION: "Please provide a question in the passed data", + FeedbackValidatorIssue.INVALID_TIMESTAMP: "Timestamp automatically set to current time", + FeedbackValidatorIssue.CONVERT_UNIX_TO_DATETIME: "", + FeedbackValidatorIssue.INVALID_TYPE: "Type not provided. Automatically set to OTHER", + FeedbackValidatorIssue.MISSING_ANSWER: "Please provide an answer in the passed data", + } + + def validate(self, data: dict) -> dict: + """ + Ensures that: + 1. Timestamp is valid + 2. A correct answer type is assigned + 3. A question exists + 4. An answer exists + + Parameters + ---------- + `data : dict` A feedback object {type: String, timestamp: datetime, question: String, answer: String} + """ + issues = [] + no_content = re.compile("\W") + # Timestamp is valid + if "timestamp" not in data or type(data["timestamp"]) != int: + issues.append(FeedbackValidatorIssue.INVALID_TIMESTAMP) + + elif "timestamp" in data and type(data["timestamp"]) == int: + issues.append(FeedbackValidatorIssue.CONVERT_UNIX_TO_DATETIME) + + # A correct type is assigned + if ( + "type" not in data + or type(data["type"]) != str + or data["type"] not in ["fact", "related", "stats", "other"] + ): + issues.append(FeedbackValidatorIssue.INVALID_TYPE) + + # A question exists + if ( + "question" not in data + or type(data["question"]) != str + or no_content.match(data["question"]) + ): + issues.append(FeedbackValidatorIssue.MISSING_QUESTION) + + # An answer exists + if ( + "answer" not in data + or type(data["answer"]) != str + or no_content.match(data["answer"]) + ): + issues.append(FeedbackValidatorIssue.MISSING_ANSWER) + + return issues + + def fix(self, data: dict, issues: dict) -> dict: + """ + Fixes feedback data. + - Critical issues: + 1. An invalid timestamp is present + 2. An invalid type is present + 3. No question or answer is provided + + Parameters + ---------- + - `data : dict` A feedback object {type: String, timestamp: datetime, question: String, answer: String} + - `issues: dict` lists of FeedbackValidatorIssues + + Returns + ------- + dict + A fixed copy of the data + + Raises + ------ + FeedbackValidatorError + when the issue with the feedback data is not fixable. + + """ + + form = data.copy() + + for issue in issues: + # fixes invalid timestamp (set to current datetime) + if issue == FeedbackValidatorIssue.INVALID_TIMESTAMP: + print("Inferred query timestamp on server") + form["timestamp"] = datetime.datetime.now() + + # converts a valid unix timestamp to a Python Datetime object + elif issue == FeedbackValidatorIssue.CONVERT_UNIX_TO_DATETIME: + form["timestamp"] = datetime.datetime.fromtimestamp(form["timestamp"]) + + # fixes invalid type (set to OTHER) + elif issue == FeedbackValidatorIssue.INVALID_TYPE: + print(f"Changed query type from invalid form to 'other'") + form["type"] = "other" + + # raises errors for missing answer or missing questions + elif issue == FeedbackValidatorIssue.MISSING_ANSWER: + raise FeedbackValidatorError( + self.error_messages[FeedbackValidatorIssue.MISSING_ANSWER] + ) + elif issue == FeedbackValidatorIssue.MISSING_QUESTION: + raise FeedbackValidatorError( + self.error_messages[FeedbackValidatorIssue.MISSING_QUESTION] + ) + return form diff --git a/nimbus.py b/nimbus.py new file mode 100644 index 0000000..88d49b3 --- /dev/null +++ b/nimbus.py @@ -0,0 +1,77 @@ +# This import fixes a segfault on Ubuntu 18.04.1 LTS. It doesn't seem to do anything, +# and doesn't seem to be used by anything, but if its removed, the program segfaults. +# See issue #90 on github. This segfault does not occur on Mac or Windows. +# Feel free to debug this if you would like. Current dev hours counter on this issue: +# 30 hours +# Update the counter above if you work on this issue. +# +from werkzeug.exceptions import BadRequestKeyError +from QA import create_qa_mapping, generate_qa_pairs +from nimbus_nlp.question_classifier import QuestionClassifier +from nimbus_nlp.variable_extractor import VariableExtractor +from database_wrapper import NimbusMySQLAlchemy + + +class Nimbus: + def __init__(self, db: NimbusMySQLAlchemy): + self.db = db + qa_pairs = db.get_all_answerable_pairs() + self.qa_dict = create_qa_mapping(generate_qa_pairs(qa_pairs, db)) + # Instantiate variable extractor and question classifier + self.variable_extractor = VariableExtractor() + self.classifier = QuestionClassifier(db) + # Load classifier model + try: + self.classifier.load_latest_classifier() + except ValueError as e: + # happens when the model doesn't exist; train a new model. + self.classifier.train_model() + self.classifier.load_latest_classifier() + + def answer_question(self, question): + ans_dict = self.predict_question(question) + print(ans_dict) + try: + qa = self.qa_dict[ans_dict["question class"]] + except KeyError: + # Printed if question isn't found. This occurs because the training set is broader + # than the answerable question set. + return "I'm sorry, I don't understand. Please try another question." + else: + answer = qa.answer(ans_dict) + if answer is None: + # Printed when a database query was made and a null value was returned. + # Should be handled in the QA class in the future. + return ( + "I'm sorry, I understand your question but was unable to find an answer. " + "Please try another question." + ) + else: + return answer + + def predict_question(self, question): + """ + Runs through variable extraction and the question classifier to + predict the intended question. + Args: input_question (string) - user input question to answer + Return: nlp_props (dict) - contains the user"s input question, + the variable extracted input question, + the entity extracted, and the predicted + answer + """ + + # Get dictionary of extracted variables + info from question + nlp_props = self.variable_extractor.extract_variables(question) + + # Add classified question to nlp_props dictionary + nlp_props["question class"] = self.classifier.classify_question( + nlp_props["normalized question"] + ) + + return nlp_props + + +if __name__ == "__main__": + db = NimbusMySQLAlchemy() + qc = QuestionClassifier(db) + qc.train_model() diff --git a/nimbus_nlp/__init__.py b/nimbus_nlp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nimbus_nlp/models/classification/keep b/nimbus_nlp/models/classification/keep new file mode 100644 index 0000000..e69de29 diff --git a/nimbus_nlp/models/features/overall_features.json b/nimbus_nlp/models/features/overall_features.json new file mode 100644 index 0000000..11439a5 --- /dev/null +++ b/nimbus_nlp/models/features/overall_features.json @@ -0,0 +1 @@ +{"prereqs": 0, "COURSE": 0, "be": 0, "course": 0, "require": 0, "take": 0, "unit": 0, "count": 0, "typically": 0, "offer": 0, "quarter": 0, "go": 0, "4": 0, "class": 0, "term": 0, "prerequisite": 0, "have": 0, "recommend": 0, "need": 0, "order": 0, "topic": 0, "cover": 0, "title": 0, "call": 0, "code": 0, "time": 0, "year": 0, "available": 0, "fulfill": 0, "preqs": 0, "soon": 0, "description": 0, "learn": 0, "credit": 0, "worth": 0, "number": 0, "corequisite": 0, "concurrent": 0, "enrollement": 0, "list": 0, "lab": 0, "PROF": 0, "office": 0, "hour": 0, "right": 0, "later": 0, "today": 0, "tomorrow": 0, "email": 0, "phone": 0, "contact": 0, "information": 0, "reach": 0, "room": 0, "address": 0, "visit": 0, "host": 0, "job": 0, "lecturer": 0, "professor": 0, "position": 0, "Cal": 0, "Poly": 0, "place": 0, "find": 0, "campus": 0, "building": 0, "day": 0, "CLUB": 0, "club": 0, "box": 0, "college": 0, "affiliate": 0, "President": 0, "Advisor": 0, "do": 0, "brief": 0, "overview": 0, "give": 0, "chief": 0, "touch": 0, "get": 0, "general": 0, "adviser": 0, "usually": 0, "currently": 0, "advisor": 0, "in": 0, "teach": 0, "week": 0, "platform": 0, "virtual": 0, "zoom": 0, "vroom": 0, "Zoom": 0, "Skype": 0, "see": 0, "section": 0, "type": 0, "department": 0, "found": 0, "tell": 0, "student": 0, "attend": 0, "cal": 0, "faculty": 0, "ratio": 0, "big": 0, "acre": 0, "program": 0, "bacherlor": 0, "degree": 0, "master": 0, "minor": 0, "credential": 0, "make": 0, "create": 0, "help": 0, "ask": 0, "kind": 0, "question": 0, "not related": 0} \ No newline at end of file diff --git a/nimbus_nlp/question_classifier.py b/nimbus_nlp/question_classifier.py new file mode 100644 index 0000000..a8eee9c --- /dev/null +++ b/nimbus_nlp/question_classifier.py @@ -0,0 +1,135 @@ +import json +import numpy as np +import spacy +import sklearn.neighbors + +from nimbus_nlp.save_and_load_model import save_model, load_latest_model, PROJECT_DIR +from typing import Tuple + +# TODO: move the functionality in this module into class(es), so that it can be more easily used as a dependency + + +class QuestionClassifier: + def __init__(self, db): + self.db = db + self.classifier = None + self.nlp = spacy.load("en_core_web_sm") + self.WH_WORDS = {"WDT", "WP", "WP$", "WRB"} + self.overall_features = {} + + def train_model(self): + self.classifier = self.build_question_classifier( + question_pairs=self.db.get_all_answerable_pairs() + ) + save_model(self.classifier, "nlp-model") + + def load_latest_classifier(self): + self.classifier = load_latest_model() + with open(PROJECT_DIR + "/models/features/overall_features.json", "r") as fp: + self.overall_features = json.load(fp) + + # Added question pairs as a parameter to remove database_wrapper as a dependency + # Including database_wrapper introduces circular dependencies + def build_question_classifier(self, question_pairs: Tuple[str, str]): + """ + Build overall feature set for each question based on feature vectors of individual questions. + Train KNN classification model with overall feature set. + """ + questions = [q[0] for q in question_pairs] + question_features = [ + self.get_question_features(self.nlp(str(q))) for q in questions + ] + + for feature in question_features: + for key in feature: + self.overall_features[key] = 0 + self.overall_features["not related"] = 0 + + vectors = [] + for feature in question_features: + vector_gen = [ + feature[k] if k in feature else 0 for k in self.overall_features + ] + vectors.append(np.array(vector_gen)) + + vectors = np.array(vectors) + y_train = np.array(questions) + new_classifier = sklearn.neighbors.KNeighborsClassifier(n_neighbors=1) + new_classifier.fit(vectors, y_train) + + with open(PROJECT_DIR + "/models/features/overall_features.json", "w") as fp: + json.dump(self.overall_features, fp) + + return new_classifier + + def is_wh_word(self, token): + return token.tag_ in self.WH_WORDS + + def filter_wh_tags(self, spacy_doc): + return [t.text for t in spacy_doc if self.is_wh_word(t)] + + def validate_wh(self, s1, s2): + # only parses as a spacy doc if necessary + doc1 = s1 if type(s1) == spacy.tokens.doc.Doc else self.nlp(s1) + doc2 = s2 if type(s2) == spacy.tokens.doc.Doc else self.nlp(s2) + return self.filter_wh_tags(doc1) == self.filter_wh_tags(doc2) + + def get_question_features(self, spacy_doc): + features = dict() + + for token in spacy_doc: + + # Filters stop words, punctuation, and symbols + if token.is_stop or not (token.is_digit or token.is_alpha): + continue + + # Add [VARIABLES] with weight 90. + # token.i returns the index of the token, and token.nbor(n) return the token + # n places away. Only the left neighbor is tested for brevity. + elif token.i != 0 and token.nbor(-1).text == "[": + features[token.text] = 90 + + # Add WH words with weight 60 + # elif self.is_wh_word(token): + # .lemma_ is already lowercase; no .lower() needed + # features[token.lemma_] = 3 + + # Add all other words with weight 30 + else: + features[token.lemma_] = 30 + + # Replace the stemmed main verb with weight 60 + sent = next(spacy_doc.sents) + stemmed_main_verb = sent.root.lemma_ + features[stemmed_main_verb] = 60 + + return features + + def classify_question(self, question): + if self.classifier is None: + raise ValueError("Classifier is not initialized") + + # Create the spacy doc. Handles pos tagging, stop word removal, tokenization, + # lemmatization, etc + doc = self.nlp(question) + test_features = self.get_question_features(doc) + + array_gen = [ + test_features[k] if k in test_features else 0 for k in self.overall_features + ] + test_array = np.array(array_gen) + + # Flatten array into a vector + test_vector = test_array.reshape(1, -1) + + min_dist = np.min(self.classifier.kneighbors(test_vector, n_neighbors=1)) + + if min_dist > 150: + return "I don't think that's a Statistics related question! Try asking something about the STAT curriculum." + + # Cast to string because the classifier returns a numpy.str_, which causes issues + # with the validate_wh function below. + predicted_question = str(self.classifier.predict(test_vector)[0]) + # wh_words_match = self.validate_wh(doc, predicted_question) + + return predicted_question diff --git a/nimbus_nlp/save_and_load_model.py b/nimbus_nlp/save_and_load_model.py new file mode 100644 index 0000000..555d2c3 --- /dev/null +++ b/nimbus_nlp/save_and_load_model.py @@ -0,0 +1,37 @@ +import glob +import os +import joblib +import pickle +from datetime import datetime + + +PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) +now = datetime.now() +date_time = now.strftime("_%m_%d_%Y_%H_%M_%S") + + +def save_model(model, model_name): + save_path = ( + PROJECT_DIR + "/models/classification/" + model_name + date_time + ".pkl" + ) + f = open(save_path, "wb") + pickle.dump(model, f) + f.close() + print("Saved model :", save_path) + + +def load_model(model_name): + train_path = PROJECT_DIR + "/models/classification/" + model_name + ".joblib" + return joblib.load(train_path) + + +def load_latest_model(): + # https://stackoverflow.com/a/39327156 + train_path = PROJECT_DIR + "/models/classification/*" + list_of_files = glob.glob(train_path) + latest_file = max(list_of_files, key=os.path.getctime) + filename, file_extension = os.path.splitext(latest_file) + if file_extension != ".pkl": + raise ValueError + else: + return joblib.load(latest_file) diff --git a/nimbus_nlp/variable_extractor.py b/nimbus_nlp/variable_extractor.py new file mode 100644 index 0000000..2316980 --- /dev/null +++ b/nimbus_nlp/variable_extractor.py @@ -0,0 +1,151 @@ +import os +import json +from google.api_core.client_options import ClientOptions +from google.cloud import automl_v1 + + +class VariableExtractor: + def __init__(self, config_file: str = "config.json"): + + with open(config_file) as json_data_file: + config = json.load(json_data_file) + + if config.get("GOOGLE_CLOUD_NLP_MODEL_NAME", False): + self.model_name = config["GOOGLE_CLOUD_NLP_MODEL_NAME"] + else: + msg = "config.json is missing {} field.".format( + "GOOGLE_CLOUD_NLP_MODEL_NAME" + ) + raise Exception(msg) + + credential_path = os.getcwd() + "/auth.json" + # TODO: consider does this even do anything useful? + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_path + + @staticmethod + def inline_text_payload(sent): + """ + Converts the input sentence into GCP"s callable format + Args: sent (string) - input sentence + Return: (dict) - GCP NER input format + """ + + return {"text_snippet": {"content": sent, "mime_type": "text/plain"}} + + def get_prediction(self, sent): + """ + Obtains the prediction from the input sentence and returns the + normalized sentence + Args: sent (string) - input sentence + Return: request (PredictObject) - predictiton output + """ + + params = {} + + # Setup API + options = ClientOptions(api_endpoint="automl.googleapis.com") + + # Create prediction object + predictor = automl_v1.PredictionServiceClient(client_options=options) + + # Format input sentence + payload = self.inline_text_payload(sent) + + # Make prediction API call + request = predictor.predict(self.model_name, payload, params) + + # Return the output of the API call + return request + + def extract_variables(self, sent): + """ + Takes the prediction and replaces the entity with its corresponding tag + Args: sent (string) - input sentence + Return: (dict) - "entity" - extracted entity + "tag" - tag of the extracted entity + "normalized entity" - stripped entity + "input question" - input question from the user + "normalized question" - variable-replaced question + """ + + entity = "" + tag = "" + normalized_entity = "" + normalized_question = "" + + # Make the prediction + request = self.get_prediction(sent) + + # Input had detected entities + if list(request.payload): + + # Obtain the entity in the sentence + entity = request.payload[0].text_extraction.text_segment.content + + # Obtain the predicted tag + tag = request.payload[0].display_name + + # Removes excessive words from the entity + normalized_entity = VariableExtractor.excess_word_removal(entity, tag) + + # Replaces the entity of input question with its corresponding tag + normalized_question = sent.replace(entity, "[" + tag + "]") + + return { + "entity": entity, + "tag": tag, + "normalized entity": normalized_entity, + "input question": sent, + "normalized question": normalized_question, + } + + @staticmethod + def excess_word_removal(entity, tag): + """ + Checks the tag and determines which excess word removal function to use + Args: entity (string) - extracted entity from the input question + Return: (string) - returns the normalized entity string + """ + + if tag == "PROF": + return VariableExtractor.strip_titles(entity) + + else: + return entity + + @staticmethod + def strip_titles(entity): + """ + Strips titles from input entities + Args: entity (string) - extracted entity from the input question + Return: norm_entity (string) - the normalized, title-stripped entity + """ + + # list of titles for removal + titles = { + "professor", + "dr.", + "dr", + "doctor", + "prof", + "instructor", + "mrs.", + "mr.", + "ms.", + "mrs", + "mr", + "ms", + "mister", + } + + # tokenizes the entity + for name in entity.split(): + + # checks each token with the titles set and replaces the title + # if it is within the word + if name.lower() in titles: + return entity.replace(name + " ", "") + + # returns the original entity string + # if there is no title in the word + return entity diff --git a/office_hours_post_request.py b/office_hours_post_request.py new file mode 100644 index 0000000..53a55b0 --- /dev/null +++ b/office_hours_post_request.py @@ -0,0 +1,192 @@ +import csv +import json +import requests +import sys + + +def convert_to_dict(data_in: list): + """ + Takes a list of string lists which contains each row from + a CSV and generates a dictionary of dictionaries which is + each professor and their properties + + Args: + data_in (list of str lists) + + Returns: + return_dict (dict of dicts) + + Ex: + >> data_in = [[ + "Smith, John", + "01-234", + "4567", + "jsmith@", + "10:00 - 12:00 PM", + "", + "10:00 - 12:00 PM", + "", + "10:00 - 12:00 PM"] + ] + + >> oh_hours = convert_to_dict(data_in) + >> oh_hours + { + + "Smith, John": + { + + "Name": "Smith, John" + "Office": "01-234" + "Phone": "4567" + "Email": "jsmith@" + "Monday": "10:00 - 12:00 PM" + "Tuesday": "", + "Wednesday":"10:00 - 12:00 PM" + "Thursday: "", + "Friday": "10:00 - 12:00 PM" } + } + + """ + + # Dictionary of professors to be returned + return_dict = {} + + try: + + # Parse each list of the input which is a row from the CSV + for data in data_in: + + # Generate the office hours template dictionary + office_hours_dict = { + "Name": "", + "Office": "", + "Phone": "", + "Email": "", + "Monday": "", + "Tuesday": "", + "Wednesday": "", + "Thursday": "", + "Friday": "", + } + + # Populate each property of the office hours dictionary + office_hours_dict["Name"] = data[0] + office_hours_dict["Office"] = data[1] + office_hours_dict["Phone"] = data[2] + office_hours_dict["Email"] = data[3] + office_hours_dict["Monday"] = data[4] + office_hours_dict["Tuesday"] = data[5] + office_hours_dict["Wednesday"] = data[6] + office_hours_dict["Thursday"] = data[7] + office_hours_dict["Friday"] = data[8] + + # The key of the current professor of the professors dictionary + # is the professor's name + return_dict[data[0]] = office_hours_dict + + return return_dict + + except Exception as e: + raise e + + +def process_csv(curr_file: str): + """ + Reads in the CSV and outputs a list of string lists + which is each row of the CSV + + Args: + curr_file (str) + + Return: + curr_data (list of str lists) + + Ex: + >> out_list = process_csv("/path/to/office_hours.csv") + >> out_list + [[ + "Smith, John", + "01-234", + "4567", + "jsmith@", + "10:00 - 12:00 PM", + "", + "10:00 - 12:00 PM", + "", + "10:00 - 12:00 PM"] + ] + """ + # List to be returned + curr_data = [] + + # Open the CSV and read the fields of the CSV + with open(curr_file, "r") as csv_file: + csvreader = csv.reader(csv_file) + fields = next(csvreader) + + # Iterate through each row and append the row + # to curr_data + for row in csvreader: + curr_data.append(row) + + return curr_data + + +def post_request(oh_dict: dict): + """ + Takes in the dictionary of professors and + sends the post request. + + Args: + oh_dict (dict) + + Return: + None + + Ex: + >> oh_dict = + { + "Smith, John" : + { + "Name": "Smith, John" + "Office": "01-234" + "Phone": "4567" + "Email": "jsmith@" + "Monday": "10:00 - 12:00 PM" + "Tuesday": "", + "Wednesday":"10:00 - 12:00 PM" + "Thursday: "", + "Friday": "10:00 - 12:00 PM"} + } + + >> post_request(oh_dict) + + """ + + # URL for making the post request + url = sys.argv[2] + + # Header contents for the post request + headers = {"Content-Type": "application/json"} + + # Passes the professor dictionary through the post request + x = requests.post(url, headers=headers, data=json.dumps(oh_dict)) + + +if __name__ == "__main__": + if (len(sys.argv) != 3) or (sys.argv[1][-3:] != "csv"): + print( + "Usage: python office_hours_post_request.py " + '"/path/to/office_hours.csv" ' + '"http://post_request_url.com/new_data/office_hours"' + ) + + else: + try: + csv_data = process_csv(sys.argv[1]) + oh_dict = convert_to_dict(csv_data) + post_request(oh_dict) + + except Exception as e: + raise e diff --git a/qa_pair.csv b/qa_pair.csv new file mode 100644 index 0000000..52a1b1c --- /dev/null +++ b/qa_pair.csv @@ -0,0 +1,2 @@ +question_format,answer_format +What is the meaning of life?,Dr. Fizzbuzz says the answer is sqrt(1764) diff --git a/requirements.txt b/requirements.txt index 7e2c247..0718795 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,60 @@ +blis==0.4.1 +cachetools==4.0.0 +catalogue==1.0.0 +certifi==2019.11.28 +chardet==3.0.4 +Click==7.0 +cymem==2.0.3 +Flask==1.1.1 +Flask-Cors==3.0.8 +fuzzywuzzy==0.18.0 +google-api-core==1.16.0 +google-api-python-client==1.7.11 +google-auth==1.11.2 +google-auth-httplib2==0.0.3 +google-cloud==0.34.0 +google-cloud-automl==0.10.0 +googleapis-common-protos==1.51.0 +grpcio==1.27.2 +gunicorn==20.0.4 +httplib2==0.17.0 +idna==2.6 +importlib-metadata==1.5.0 +itsdangerous==1.1.0 +Jinja2==2.11.1 +joblib==0.14.0 +MarkupSafe==1.1.1 +marshmallow==3.5.0 +mock==4.0.1 +murmurhash==1.0.2 mysql-connector-python==8.0.18 -protobuf==3.10.0 -six==1.13.0 +nltk==3.4.5 +numpy==1.18.3 +oauth2client==4.1.3 +pandas==1.0.1 +plac==1.1.3 +preshed==3.0.2 +protobuf==3.11.3 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +PyDrive==1.3.1 +pytest==5.3.5 +python-dateutil==2.8.1 +python-Levenshtein==0.12.0 +pytz==2019.3 +PyYAML==5.3 +requests==2.23.0 +rsa==4.0 +scikit-learn==0.22.2 +scipy==1.4.1 +six==1.11.0 +spacy==2.2.4 +SQLAlchemy==1.3.13 +srsly==1.0.2 +thinc==7.4.0 +tqdm==4.43.0 +uritemplate==3.0.1 +urllib3==1.25.8 +wasabi==0.6.0 +Werkzeug==1.0.0 +zipp==3.0.0 diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..8dc8097 --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python3 flask_api.py diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000..7d6cc0b --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python -m pytest \ No newline at end of file diff --git a/scripts/cert-update.sh b/scripts/cert-update.sh new file mode 100755 index 0000000..9071ba0 --- /dev/null +++ b/scripts/cert-update.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +eval `ssh-agent` +ssh-keyscan -H github.com >> ~/.ssh/known_hosts +ssh-add /nimbus/id_rsa + +certbot renew +cd /api-certificates +git pull +tar cvf /api-certificates/letsencrypt_backup.tar /etc/letsencrypt +git add . +# `date` will print the date like this: Sun May 10 15:22:23 PDT 2020 +git commit -m "automated upload from google cloud on `date`" +git push \ No newline at end of file diff --git a/scripts/gce_deploy.sh b/scripts/gce_deploy.sh new file mode 100644 index 0000000..07bfed7 --- /dev/null +++ b/scripts/gce_deploy.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# This script manually deploys to Google Compute Engine +# +# + +export GITHUB_SHA=`git rev-parse HEAD` + +docker build -t gcr.io/$GCE_PROJECT/$GCE_INSTANCE-image:$GITHUB_SHA \ + --build-arg GITHUB_SHA="$GITHUB_SHA" \ + --build-arg GITHUB_REF="$GITHUB_REF" \ + --build-arg DATABASE_HOSTNAME \ + --build-arg DATABASE_PASSWORD \ + --build-arg DATABASE_USERNAME \ + --build-arg DATABASE_NAME \ + --build-arg PYDRIVE_CLIENT_ID \ + --build-arg PYDRIVE_CLIENT_SECRET \ + --build-arg GOOGLE_DRIVE_CREDENTIALS \ + --build-arg GOOGLE_DRIVE_FOLDER_ID \ + --build-arg GOOGLE_CLOUD_NLP_CREDENTIALS \ + --build-arg GOOGLE_CLOUD_NLP_MODEL_NAME \ + --build-arg GIT_SSH_CERT \ + --build-arg PORT="$SSL_PORT" . + +docker push gcr.io/$GCE_PROJECT/$GCE_INSTANCE-image:$GITHUB_SHA + +# sleep for 60 seconds to allow gce to restart after deploy +gcloud compute instances update-container $GCE_INSTANCE \ + --zone $GCE_INSTANCE_ZONE \ + --container-image=gcr.io/$GCE_PROJECT/$GCE_INSTANCE-image:$GITHUB_SHA \ + --project=$GCE_PROJECT && sleep 60 && gcloud compute ssh $GCE_INSTANCE --zone=$GCE_INSTANCE_ZONE --project=$GCE_PROJECT --command='docker image prune -a -f' diff --git a/scripts/letsencrypt-backup-tar.cron b/scripts/letsencrypt-backup-tar.cron new file mode 100644 index 0000000..74b6844 --- /dev/null +++ b/scripts/letsencrypt-backup-tar.cron @@ -0,0 +1 @@ +0 15 * * * /nimbus/scripts/cert-update.sh > stdout diff --git a/scripts/setup_letsencrypt.sh b/scripts/setup_letsencrypt.sh new file mode 100755 index 0000000..668c774 --- /dev/null +++ b/scripts/setup_letsencrypt.sh @@ -0,0 +1,18 @@ +#! /bin/bash +eval `ssh-agent` +# clones current info from the private repo using the ssh key setup on deploy +cd / +# this will setup github's fingerprint so we can ssh in, and setup our keys +mkdir ~/.ssh +ssh-keyscan -H github.com >> ~/.ssh/known_hosts +ssh-add /nimbus/id_rsa +git clone git@github.com:calpoly-csai/api-certificates.git +git config --global user.email "cpcsai.nimbus@gmail.com" +git config --global user.name "GCE_GIT_BOT" +cd /api-certificates +tar xvf letsencrypt_backup.tar -C / +crontab /nimbus/scripts/letsencrypt-backup-tar.cron +# updating the certs is free, and shouldn't actually happen if the above has all +# been successful. This avoids us not updating if deploys interrupt the update +# from happening. +/bin/bash /nimbus/scripts/cert-update.sh diff --git a/settings.yaml_SAMPLE b/settings.yaml_SAMPLE new file mode 100644 index 0000000..517d0e8 --- /dev/null +++ b/settings.yaml_SAMPLE @@ -0,0 +1,20 @@ +# This file allows authentication with Google Drive for the nimbus API +# This enables uploading files to our Google Drive folder. +# DO NOT upload this file (or credentials.json) (if containing real secrets) to github or any other public directory! +client_config_backend: settings +client_config: + client_id: SECRET + client_secret: SECRET + +save_credentials: True +save_credentials_backend: file +save_credentials_file: credentials.json + +get_refresh_token: True + +oauth_scope: + - https://www.googleapis.com/auth/drive.file + - https://www.googleapis.com/auth/drive.install + - https://www.googleapis.com/auth/drive + - https://www.googleapis.com/auth/drive.metadata + diff --git a/setup_special_files_from_env.py b/setup_special_files_from_env.py new file mode 100755 index 0000000..ddee6d4 --- /dev/null +++ b/setup_special_files_from_env.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +from os import environ, chmod, listdir +import json +from utilities import yaml_utils # noqa +import argparse + +# ============================================================================= +# GET THE COMMAND LINE ARGUMENTS +# ============================================================================= +parser = argparse.ArgumentParser(description="Setup Special Files From Environment.") +parser.add_argument( + "--overwrite-all", + action="store_true", + help="optionally overwrite all the existing files, if exist (default: False).", +) +args = parser.parse_args() +OVERWRITE_ALL = args.overwrite_all + + +# ============================================================================= +# GLOBAL VARIABLES +# ============================================================================= +# NimbusDatabase stuff +SAMPLE_CONFIG_FILE = "config.json_SAMPLE" +CONFIG_FILE = "config.json" + +# PYDRIVE stuff +SAMPLE_PYDRIVE_FILE = "settings.yaml_SAMPLE" +PYDRIVE_FILE = "settings.yaml" +PYDRIVE_FOLDER_ID_FILE = "folder_id.txt" +GOOGLE_DRIVE_FOLDER_ID_FILE = "folder_id.txt" +GOOGLE_DRIVE_FOLDER_ID_KEY = "GOOGLE_DRIVE_FOLDER_ID" +PYDRIVE_CLIENT_ID_KEY = "PYDRIVE_CLIENT_ID" +PYDRIVE_CLIENT_SECRET_KEY = "PYDRIVE_CLIENT_SECRET" +GOOGLE_DRIVE_CREDENTIALS_FILE = "credentials.json" +GOOGLE_DRIVE_CREDENTIALS_KEY = "GOOGLE_DRIVE_CREDENTIALS" +SSH_CERT_FILE = "id_rsa" + +# GOOGLE CLOUD stuff +GOOGLE_CLOUD_NLP_CREDENTIALS_FILE = "auth.json" +GOOGLE_CLOUD_NLP_CREDENTIALS_KEY = "GOOGLE_CLOUD_NLP_CREDENTIALS" + +SPECIAL_FILES = [ + CONFIG_FILE, + PYDRIVE_FILE, + PYDRIVE_FOLDER_ID_FILE, + GOOGLE_DRIVE_FOLDER_ID_FILE, + GOOGLE_DRIVE_CREDENTIALS_FILE, +] + + +# ============================================================================= +# SKIP SCRIPT IF FILES EXIST, ELSE CONTINUE +# * optionally provide in `--overwrite-all` flag +# ============================================================================= +if OVERWRITE_ALL: + pass +else: + for fname in SPECIAL_FILES: + if fname in listdir(): + print(f"\n\nFOUND a file {fname}\n\n") + print( + "\nif you dont care about it, call this script again with `--overwrite-all`\n\n" + ) + exit(1) + + +# ============================================================================= +# ASSERTIONS +# ============================================================================= +BAD_CONFIG_MSG = "uh oh, config vars not set, ask a Nimbus maintainer/admin for help." +assert environ.get("DATABASE_HOSTNAME", None) is not None, BAD_CONFIG_MSG +assert environ.get("DATABASE_PASSWORD", None) is not None, BAD_CONFIG_MSG +assert environ.get("DATABASE_USERNAME", None) is not None, BAD_CONFIG_MSG +assert environ.get("DATABASE_NAME", None) is not None, BAD_CONFIG_MSG +assert environ.get("PYDRIVE_CLIENT_ID", None) is not None, BAD_CONFIG_MSG +assert environ.get("PYDRIVE_CLIENT_SECRET", None) is not None, BAD_CONFIG_MSG +# fmt: off +assert environ.get("GOOGLE_DRIVE_CREDENTIALS", None) is not None, BAD_CONFIG_MSG # noqa +assert environ.get("GOOGLE_DRIVE_FOLDER_ID", None) is not None, BAD_CONFIG_MSG # noqa +assert environ.get("GOOGLE_CLOUD_NLP_CREDENTIALS", None) is not None, BAD_CONFIG_MSG # noqa +assert environ.get("GOOGLE_CLOUD_NLP_MODEL_NAME", None) is not None, BAD_CONFIG_MSG # noqa +# fmt: on + +BAD_CONFIG_MSG_2 = "uh oh, config var is empty string, check docker" +assert environ.get("DATABASE_HOSTNAME", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("DATABASE_PASSWORD", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("DATABASE_USERNAME", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("DATABASE_NAME", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("PYDRIVE_CLIENT_ID", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("PYDRIVE_CLIENT_SECRET", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("GOOGLE_DRIVE_CREDENTIALS", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("GOOGLE_DRIVE_FOLDER_ID", None) != "", BAD_CONFIG_MSG_2 +assert environ.get("GOOGLE_CLOUD_NLP_CREDENTIALS", None) != "", BAD_CONFIG_MSG_2 # noqa +assert environ.get("GOOGLE_CLOUD_NLP_MODEL_NAME", None) != "", BAD_CONFIG_MSG_2 + + +# ============================================================================= +# MAKE THE `config.json` dictionary +# ============================================================================= +# This dictionary should look exactly like the `SAMPLE_CONFIG_FILE` +# it contains everything we could possibly neeed +config = { + "PYDRIVE_CLIENT_ID": environ["PYDRIVE_CLIENT_ID"], + "PYDRIVE_CLIENT_SECRET": environ["PYDRIVE_CLIENT_SECRET"], + "GOOGLE_DRIVE_CREDENTIALS": environ["GOOGLE_DRIVE_CREDENTIALS"], + "GOOGLE_DRIVE_FOLDER_ID": environ["GOOGLE_DRIVE_FOLDER_ID"], + "GOOGLE_CLOUD_NLP_CREDENTIALS": environ["GOOGLE_CLOUD_NLP_CREDENTIALS"], + "GOOGLE_CLOUD_NLP_MODEL_NAME": environ["GOOGLE_CLOUD_NLP_MODEL_NAME"], + "mysql": { + "host": environ["DATABASE_HOSTNAME"], + # 3306 is the default port for mysql + "port": "3306", + "user": environ["DATABASE_USERNAME"], + "password": environ["DATABASE_PASSWORD"], + "database": environ["DATABASE_NAME"], + "sql_dir": "sql", + "create_file": "csai_nov_8_2019_create_script.min.sql", + }, +} + + +# ============================================================================= +# save the CONFIG_FILE +# ============================================================================= +with open(CONFIG_FILE, "w") as json_file: + json.dump(config, json_file) + + +# ============================================================================= +# MAKE THE pydrive_yaml dictionary +# +# TODO: consider ENV variable for pydrive's `save_credentials_file` +# in case the name changes due to conflict with other google credentials +# ============================================================================= +pydrive_yaml = { + "client_config_backend": "settings", + "client_config": { + "client_id": config[PYDRIVE_CLIENT_ID_KEY], + "client_secret": config[PYDRIVE_CLIENT_SECRET_KEY], + }, + "save_credentials": True, + "save_credentials_backend": "file", + "save_credentials_file": "credentials.json", + "get_refresh_token": True, + "oauth_scope": [ + "https://www.googleapis.com/auth/drive.file", + "https://www.googleapis.com/auth/drive.install", + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/drive.metadata", + ], +} + + +# ============================================================================= +# save the PYDRIVE_FILE +# ============================================================================= +yaml_utils.dump_yaml(pydrive_yaml, PYDRIVE_FILE) + + +# ============================================================================= +# save the GOOGLE_DRIVE_FOLDER_ID_FILE +# ============================================================================= +with open(GOOGLE_DRIVE_FOLDER_ID_FILE, "w") as f: + f.write(config[GOOGLE_DRIVE_FOLDER_ID_KEY]) + + +# ============================================================================= +# save the ssh certificates to the correct directory +# ============================================================================= +if environ.get("GIT_SSH_CERT") is not None: + with open(SSH_CERT_FILE, "w") as f: + f.write(environ["GIT_SSH_CERT"]) + chmod(SSH_CERT_FILE, 0o0600) + + +# ============================================================================= +# save the GOOGLE_DRIVE_CREDENTIALS_FILE +# ============================================================================= +with open(GOOGLE_DRIVE_CREDENTIALS_FILE, "w") as credentials_json_file: + # load the credentials_json from the config dict which has everything + credentials_json = json.loads(config[GOOGLE_DRIVE_CREDENTIALS_KEY]) + # dump data to credentials_json_file + json.dump(credentials_json, credentials_json_file) + + +# ============================================================================= +# save the GOOGLE_CLOUD_NLP_CREDENTIALS_FILE +# ============================================================================= +with open(GOOGLE_CLOUD_NLP_CREDENTIALS_FILE, "w") as auth_json_file: + # load the auth_json from the config dict which has everything + auth_json = json.loads(config[GOOGLE_CLOUD_NLP_CREDENTIALS_KEY]) + # dump data to auth_json_file + json.dump(auth_json, auth_json_file) diff --git a/sqlalchemy_demo.py b/sqlalchemy_demo.py new file mode 100644 index 0000000..cc5f598 --- /dev/null +++ b/sqlalchemy_demo.py @@ -0,0 +1,114 @@ +# http://bytefish.de/blog/first_steps_with_sqlalchemy/ +# https://docs.sqlalchemy.org/en/13/ +# https://www.sqlalchemy.org/library.html +# ^ if 1.3 is not current release + +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy import create_engine +from datetime import datetime, timedelta # noqa +from sqlalchemy import Table, Column, Integer, String, DateTime, ForeignKey # noqa +from sqlalchemy.orm import relationship, backref # noqa +from sqlalchemy.orm import sessionmaker +from pprint import pprint as pp +from sqlalchemy import inspect +import json + +Base = declarative_base() + + +class Tag(Base): + __tablename__ = "tags" + id = Column(Integer, primary_key=True) + name = Column(String(255), unique=True, nullable=False) + + def __repr__(self): + return "".format(self.name) + + +# connection +# https://docs.sqlalchemy.org/en/13/dialects/mysql.html#module-sqlalchemy.dialects.mysql.mysqlconnector +# engine = create_engine('mysql+mysqlconnector://USERNAME:PASSWORD@HOST_NAME:3306/DATABASE_NAME') # noqa +config_file = "config.json" +with open(config_file) as json_data_file: + config = json.load(json_data_file) + +if config.get("mysql", False): + mysql_config = config["mysql"] + RDBMS = "mysql" + PIP_PACKAGE = "mysqlconnector" + SQLALCHEMY_DATABASE_URI = "{}+{}://{}:{}@{}:{}/{}".format( + RDBMS, + PIP_PACKAGE, + mysql_config["user"], + mysql_config["password"], + mysql_config["host"], + mysql_config["port"], + mysql_config["database"], + ) + engine = create_engine(SQLALCHEMY_DATABASE_URI) + + if engine is None: + print("failed to connect to MySQL") + exit(1) +else: + print("bad config file") + exit(1) + +# create metadata +Base.metadata.create_all(engine) + +# create session +Session = sessionmaker(bind=engine) +session = Session() + +# insert data +tag_cool = Tag(name="cool") +tag_car = Tag(name="car") +tag_animal = Tag(name="animal") + +print("TAGS!!") +print(tag_cool) +print(tag_car) +print(tag_animal) +print() + +# notice the `_new` key in the dictionary +session.add_all([tag_animal, tag_car, tag_cool]) +pp(session.__dict__) +session.commit() + +# query data +t1 = session.query(Tag).filter(Tag.name == "cool").first() + +print("T1!!") +print(t1) +print() + +# update entity +t1.name = "cool-up" +print("T1 again!! notice `cool-up`") +print(t1) +print() +pp(session.__dict__) +session.commit() + +print("T1 again after commit!!") +print(t1) +print() + +# delete +# notice the `_deleted` key inside the dictionary +session.delete(t1) +pp(session.__dict__) +session.commit() + +inspector = inspect(engine) +print("table names", inspector.get_table_names()) + +print("dropping table Tag") +# https://www.pythonsheets.com/notes/python-sqlalchemy.html#drop-a-table +# https://stackoverflow.com/questions/35918605/how-to-delete-a-table-in-sqlalchemy # noqa +print(Tag.__table__.drop(engine)) +print("dropped??") + +print("table names", inspector.get_table_names()) diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000..b243bf4 --- /dev/null +++ b/tasks.py @@ -0,0 +1,259 @@ +""" +Source: https://gist.github.com/mfekadu/ceaa65dd158bd45dcfadbbda17b83b03 +""" +from invoke import task +import os +import webbrowser + +try: + from StringIO import StringIO ## for Python 2 +except ImportError: + from io import StringIO ## for Python 3 + + +""" +HELPERS +""" + + +def go_to_website(URL, verbose=True): + """ + given a URL, opens the browser + """ + print("Opening...", URL) if verbose else None + webbrowser.open(URL) + + +""" +TASKS +""" + + +@task(aliases=("list", "lsit", "ist", "-list", "lis", "li", "slit", "slist")) +def _dash_dash_list(c): + """ + because i forget --list often and fixz my ttypos + """ + try: # because pyinvoke issue #704 + c.run("invoke --list", hide="err") + except Exception as e: + print("uh oh, https://github.com/pyinvoke/invoke/issues/704") + print("but here, try this...\n") + cmd = 'cat tasks.py | grep def | grep "(\c"' # \c avoid self-reference + print(f"$ {cmd}\n") + c.run(cmd) + + +@task(aliases=("gh", "repo", "remote", "origin")) +def github(c, username="calpoly-csai", repo="api"): + """ + opens the GitHub website for this project in default browser + """ + # optionally just hard code this + # TODO: look into how to read the .git/ folder to redirect based on that. + SITE = f"https://github.com/{username}/{repo}" + go_to_website(SITE) + + +@task(aliases=("gsit", "gst", "sgit", "gis", "gsi", "giat", "gisr", "gsot", "gost")) +def gist( + c, edit=False, username="mfekadu", gist_hash="ceaa65dd158bd45dcfadbbda17b83b03" +): + """ + opens the gist.GitHub.com website for this task.py source code + """ + SITE = f"https://gist.github.com/{username}/{gist_hash}" + SITE = f"{SITE}/edit" if edit else SITE + go_to_website(SITE) + + +@task(aliases=("ghd", "desktop")) +def github_desktop(c): + """ + opens the GitHub Desktop app . macOS only. + """ + c.run("open -a 'GitHub Desktop'") + + +@task(aliases=("invoke", "wtf", "huh", "what", "umm", "uhh", "idk")) +def go_to_invoke_docs(c): + """ + opens the docs for the PyInvoke project in default browser + """ + SITE = "https://www.pyinvoke.org" + go_to_website(SITE) + + +@task(help={"name": "Name of the person to say hi to."}) +def hi(c, name, help=False): + """ + Say hi to someone. + """ + print("Hi {}!".format(name)) + + +@task(aliases=("format", "black", "lint")) +def black_auto_format(c, verbose=True): + """ + Make the code look nice. + """ + print("Formatting!") + cwd = os.getcwd() + + # move up to the directory that contains ".git" + # which often is the root of a repository + print("current directory: ", cwd) + while cwd != "/" and ".git" not in os.listdir(cwd): + if ".git" not in os.listdir(cwd): + os.chdir("..") + cwd = os.getcwd() + print("current directory: ", cwd) + else: + break + + cmd = "black ." + print("running command: {}".format(cmd)) + c.run("black .") + + +@task(aliases=("sc", "scala", "hi-scala", "hiscala", "helloscala")) +def hello_scala(c, verbose=True, name="hello_scala"): + """ + create a hello_world scala file + """ + filename = f"{name}.sc" + print(f"Creating {filename}") + file_content = """import scala.io._ +object HelloApp { + def main(args: Array[String]): Unit = { + val coder = "Python" + val num = 21 + println(s"Hello Scala from ${coder}!"); + println(s"${num + num} is a cool num"); + } +} +""" + with open(filename, "w") as f: + f.write(file_content) + + cmd = f"cat {filename}" + print(f"$ {cmd}\n") + c.run(cmd) + + cmd = f"scala {filename}" + print(f"$ {cmd}\n") + c.run(cmd) + + +@task(aliases=("copy", "pbcopy")) +def copy_tasks_py_to_clipboard(c): + """ + """ + cmd = "cat tasks.py | pbcopy" + print(f"$ {cmd}\n") + c.run(cmd) + + +@task(aliases=("ssh",)) +def copy_ssh(c): + """ + """ + # https://askubuntu.com/a/811236 + cmd = "ls -p ~/.ssh/ | grep -v /" + print(f"$ {cmd}\n") + c.run(cmd) + + choice = input("\n\nWhich one? (enter the name): ") + print("\n\n") + + cmd = f"cat ~/.ssh/{choice} | pbcopy" + print(f"$ {cmd}\n") + c.run(cmd) + + +@task +def docker(c, username=None, app_name="nimbus"): + """ + Locally, docker build && docker run + """ + ENV_KEY = "TASKS_DOCKER_USERNAME" + if username is not None: + print("hey run this to make life easier...") + print(f"export {ENV_KEY}={username}") + else: + try: + username = os.environ[ENV_KEY] + except Exception as e: + username = input("docker username? ") + print("hey run this to make life easier...") + print(f"export {ENV_KEY}={username}") + + print("make first sure to run...") + print("source .export_env_vars") + print("\n\n") + + try: + DATABASE_HOSTNAME = os.environ["DATABASE_HOSTNAME"] + DATABASE_PASSWORD = os.environ["DATABASE_PASSWORD"] + DATABASE_USERNAME = os.environ["DATABASE_USERNAME"] + DATABASE_NAME = os.environ["DATABASE_NAME"] + PYDRIVE_CLIENT_ID = os.environ["PYDRIVE_CLIENT_ID"] + PYDRIVE_CLIENT_SECRET = os.environ["PYDRIVE_CLIENT_SECRET"] + GOOGLE_DRIVE_CREDENTIALS = os.environ["GOOGLE_DRIVE_CREDENTIALS"] + GOOGLE_DRIVE_FOLDER_ID = os.environ["GOOGLE_DRIVE_FOLDER_ID"] + GOOGLE_CLOUD_NLP_CREDENTIALS = os.environ["GOOGLE_CLOUD_NLP_CREDENTIALS"] + GOOGLE_CLOUD_NLP_MODEL_NAME = os.environ["GOOGLE_CLOUD_NLP_MODEL_NAME"] + except Exception: + print("make first sure to run...") + print("source .export_env_vars") + print("\n\n") + exit() + + # automatically pass in local environment variables into the docker thing + cmd = "docker build" + cmd += " --build-arg DATABASE_HOSTNAME" + cmd += " --build-arg DATABASE_PASSWORD" + cmd += " --build-arg DATABASE_USERNAME" + cmd += " --build-arg DATABASE_NAME" + cmd += " --build-arg PYDRIVE_CLIENT_ID" + cmd += " --build-arg PYDRIVE_CLIENT_SECRET" + cmd += " --build-arg GOOGLE_DRIVE_CREDENTIALS" + cmd += " --build-arg GOOGLE_DRIVE_FOLDER_ID" + cmd += " --build-arg GOOGLE_CLOUD_NLP_CREDENTIALS" + cmd += " --build-arg GOOGLE_CLOUD_NLP_MODEL_NAME" + cmd += f' -t "{username}/{app_name}" .' + + print(f"$ {cmd}\n") + c.run(cmd, pty=True) # run the docker build + + # http://www.pyinvoke.org/faq.html#running-local-shell-commands-run + # --rm will make sure to remove the container on exit of shell + # otherwise docker containers will eat up your storage space + cmd = f"docker run -it --rm -p 8080:8080 {username}/{app_name}" + print(f"$ {cmd}\n") + c.run(cmd, pty=True) # run the docker run + + +@task(aliases=("ds", "dash", "dsh")) +def docker_shell(c, image_name=None): + """ + Run docker within an interactive shell + https://stackoverflow.com/a/44769468 + """ + ENV_KEY = "TASKS_DOCKER_IMAGE_NAME" + if image_name is not None: + print("hey run this to make life easier...") + print(f"export {ENV_KEY}={image_name}") + else: + try: + image_name = os.environ[ENV_KEY] + except Exception as e: + image_name = input("docker image_name? ") + print("hey run this to make life easier...") + print(f"export {ENV_KEY}={image_name}") + + # --rm will make sure to remove the container on exit of shell + # otherwise docker containers will eat up your storage space + cmd = f"docker run -it --rm {image_name} sh" + print(f"$ {cmd}\n") + c.run(cmd, pty=True) # run the docker interactive shell diff --git a/test.py b/test.py new file mode 100644 index 0000000..d86c1e3 --- /dev/null +++ b/test.py @@ -0,0 +1,8 @@ +import requests + +url = "https://nimbus.api.calpolycsai.com/ask" + +payload = '{\n "question": "What is Dr. Lupo\'s email?"\n}' +headers = {"Content-Type": "application/json", "Content-Type": "application/json"} +response = requests.request("POST", url, headers=headers, data=payload) +print(response.text.encode("utf8")) diff --git a/tests/MockEntity.py b/tests/MockEntity.py new file mode 100644 index 0000000..54204cb --- /dev/null +++ b/tests/MockEntity.py @@ -0,0 +1,18 @@ +from sqlalchemy import Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class MockEntity(Base): + __tablename__ = "Test" + entity_id = Column(Integer, primary_key=True) + value_one = Column(String(64)) + value_two = Column(String(64)) + value_three = Column(String(64)) + is_view = False + + def __repr__(self): + return "".format( + self.value_one, self.value_two, self.value_three + ) diff --git a/tests/MockViewEntity.py b/tests/MockViewEntity.py new file mode 100644 index 0000000..a038af8 --- /dev/null +++ b/tests/MockViewEntity.py @@ -0,0 +1,18 @@ +from sqlalchemy import Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class MockViewEntity(Base): + __tablename__ = "Test" + entity_id = Column(Integer, primary_key=True) + value_one = Column(String(64)) + value_two = Column(String(64)) + value_three = Column(String(64)) + is_view = True + + def __repr__(self): + return "".format( + self.value_one, self.value_two, self.value_three + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/hypothesis_example.py b/tests/hypothesis_example.py new file mode 100755 index 0000000..151ad0f --- /dev/null +++ b/tests/hypothesis_example.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +""" +Read the docs... + +https://hypothesis.readthedocs.io/en/latest/quickstart.html +""" + +from typing import List +from hypothesis import given +from hypothesis.strategies import lists, text + + +def concat_with_space(lst: List[str]) -> str: + """ + Returns a single string from concatenating a given list of strings. + + Example: + >>> concatenate_strings_with_space(['a','b','c']) + >>> 'a b c' + """ + s = "" + for x in lst: + s += x + " " + return s + + +@given( + lists( + text( + alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", min_size=0 + ) + ) +) +def test_concat_with_space_has_identity_property_when_split_on_space(s): + p = concat_with_space(s).split(" ") + p.pop() # because the last " " leaves an empty in list [... ,''] + assert p == s diff --git a/tests/test_database_wrapper.py b/tests/test_database_wrapper.py new file mode 100644 index 0000000..90bb38a --- /dev/null +++ b/tests/test_database_wrapper.py @@ -0,0 +1,308 @@ +import json +import os +import pytest +import sys + +from database_wrapper import ( + NimbusMySQLAlchemy, + BadDictionaryKeyError, + BadDictionaryValueError, + InvalidOperationOnView, + NimbusDatabaseError, + UnsupportedDatabaseError, + BadConfigFileError, +) +from Entity.ExpectedKeys import EXPECTED_KEYS_BY_ENTITY +from mock import patch, Mock +from .MockEntity import MockEntity +from .MockViewEntity import MockViewEntity + + +ENTITY_TYPES = [i.__name__ for i in EXPECTED_KEYS_BY_ENTITY.keys()] + +MOCK_ENTITY_DATA_DICT = { + "value_one": "test1", + "value_two": "test2", + "value_three": "test3", +} + +TEST_AUDIO_SAMPLE_META_DATA_DATA_DICT = { + "isWakeWord": True, + "firstName": "jj", + "lastName": "doe", + "gender": "f", + "noiseLevel": "q", + "location": "here", + "tone": "serious-but-not-really", + "timestamp": 1577077883, + "username": "guest", + "audio_file_id": "OZ234FSDWER5GDF234F4G5", + "script": "Nimbus", + "emphasis": "us", +} + +TEST_CONFIG_FILENAME = "testConfig.json" +TEST_CONFIG_DICT = { + "mysql": { + "host": "testHost", + "port": "testPort", + "user": "testUser", + "password": "testPassword", + "database": "testDatabase", + }, +} + +RDBMS = "mysql" +PIP_PACKAGE = "mysqlconnector" +SQLALCHEMY_DATABASE_URI = "{}+{}://{}:{}@{}:{}/{}".format( + RDBMS, + PIP_PACKAGE, + TEST_CONFIG_DICT["mysql"]["user"], + TEST_CONFIG_DICT["mysql"]["password"], + TEST_CONFIG_DICT["mysql"]["host"], + TEST_CONFIG_DICT["mysql"]["port"], + TEST_CONFIG_DICT["mysql"]["database"], +) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_validate_input_keys(mock_create_engine): + test_db = NimbusMySQLAlchemy() + test_db.validate_input_keys(MOCK_ENTITY_DATA_DICT, MOCK_ENTITY_DATA_DICT.keys()) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_validate_input_keys_no_input(mock_create_engine): + test_db = NimbusMySQLAlchemy + with pytest.raises(BadDictionaryKeyError): + test_db.validate_input_keys({}, []) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_validate_input_keys_extra_keys(mock_create_engine): + test_db = NimbusMySQLAlchemy + extra_key_dict = dict(MOCK_ENTITY_DATA_DICT) + extra_key_dict["value_extra"] = "test4" + with pytest.raises(BadDictionaryKeyError): + test_db.validate_input_keys(extra_key_dict, MOCK_ENTITY_DATA_DICT.keys()) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_validate_input_keys_missing_keys(mock_create_engine): + test_db = NimbusMySQLAlchemy + missing_key_dict = {"value_one": "test1"} + with pytest.raises(BadDictionaryKeyError): + test_db.validate_input_keys(missing_key_dict, MOCK_ENTITY_DATA_DICT.keys()) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_create_all_tables(mock_create_engine): + test_db = NimbusMySQLAlchemy() + + for entity_type in ENTITY_TYPES: + mock_entity = Mock() + mock_entity.__tablename__ = entity_type + mock_entity.__table__ = Mock() + mock_entity.__table__.create.return_value = None + setattr(test_db, entity_type, mock_entity) + + test_db._create_all_tables() + + # Verify that each Entity had .create() called on it once. + for entity_type in ENTITY_TYPES: + try: + getattr(test_db, entity_type).__table__.create.assert_called_once() + except AssertionError as e: + print("{} table was not created".format(entity_type), file=sys.stderr) + raise e + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_create_all_tables_already_exists(mock_create_engine): + mock_inspector = Mock() + mock_inspector.get_table_names.return_value = ENTITY_TYPES + + test_db = NimbusMySQLAlchemy() + test_db.inspector = mock_inspector + + for entity_type in ENTITY_TYPES: + mock_entity = Mock() + mock_entity.__tablename__ = entity_type + mock_entity.__table__ = Mock() + setattr(test_db, entity_type, mock_entity) + + test_db._create_all_tables() + + # Verify that each Entity did not have .create() called on it + for entity_type in ENTITY_TYPES: + assert not getattr(test_db, entity_type).__table__.create.called + + +@patch.object( + NimbusMySQLAlchemy, + "validate_and_format_entity_data", + return_value=MOCK_ENTITY_DATA_DICT, +) +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_insert_entity(mock_create_engine, mock_validate): + # Setup mocks and test_db instance + test_db = NimbusMySQLAlchemy() + test_db.session = Mock() + + # Insert entity and assert that add/commit were called + test_db.insert_entity(MockEntity, MOCK_ENTITY_DATA_DICT) + test_db.session.add.assert_called_once() + test_db.session.commit.assert_called_once() + + # Assert that the entity inserted was populated with the right fields + entity = test_db.session.add.call_args.args[0] + for field in list(MOCK_ENTITY_DATA_DICT.keys()): + assert getattr(entity, field) is MOCK_ENTITY_DATA_DICT[field] + + +@patch.object( + NimbusMySQLAlchemy, + "validate_and_format_entity_data", + return_value=MOCK_ENTITY_DATA_DICT, +) +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_update_entity_no_match(mock_create_engine, mock_validate): + # Setup mocks and test_db instance + mock_session = Mock() + mock_query = Mock() + + mock_session.query.return_value = mock_query + mock_query.filter.return_value = mock_query + mock_query.first.return_value = None + + test_db = NimbusMySQLAlchemy() + test_db.session = mock_session + + # Insert entity and assert that add/commit were called + test_db.update_entity(MockEntity, MOCK_ENTITY_DATA_DICT, ["value_one"]) + test_db.session.add.assert_called_once() + test_db.session.commit.assert_called_once() + + # Assert that the entity inserted was populated with the right fields + entity = test_db.session.add.call_args.args[0] + for field in list(MOCK_ENTITY_DATA_DICT.keys()): + assert getattr(entity, field) is MOCK_ENTITY_DATA_DICT[field] + + +@patch.object( + NimbusMySQLAlchemy, + "validate_and_format_entity_data", + return_value=MOCK_ENTITY_DATA_DICT, +) +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_update_entity_match(mock_create_engine, mock_validate): + # Setup mocks and test_db instance + mock_session = Mock() + mock_query = Mock() + + mock_session.query.return_value = mock_query + mock_query.filter.return_value = mock_query + mock_query.first.return_value = MockEntity + + test_db = NimbusMySQLAlchemy() + test_db.session = mock_session + + # Insert entity and assert that add/commit were called + test_db.update_entity(MockEntity, MOCK_ENTITY_DATA_DICT, ["value_one"]) + test_db.session.add.assert_called_once() + test_db.session.commit.assert_called_once() + + # Assert that the entity inserted was populated with the right fields + entity = test_db.session.add.call_args.args[0] + for field in list(MOCK_ENTITY_DATA_DICT.keys()): + assert getattr(entity, field) is MOCK_ENTITY_DATA_DICT[field] + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_update_entity_no_filter_fields_error(mock_create_engine): + test_db = NimbusMySQLAlchemy() + with pytest.raises(RuntimeError, match="filter"): + test_db.update_entity(MockEntity, MOCK_ENTITY_DATA_DICT, []) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_invalid_entity_type(mock_create_engine): + test_db = NimbusMySQLAlchemy() + with pytest.raises(KeyError): + test_db.insert_entity(MockEntity, MOCK_ENTITY_DATA_DICT) + with pytest.raises(KeyError): + test_db.update_entity(MockEntity, MOCK_ENTITY_DATA_DICT, ["test"]) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_format_audio_sample_meta_data_dict(mock_create_engine): + test_db = NimbusMySQLAlchemy() + test_db.format_audio_sample_meta_data_dict( + dict(TEST_AUDIO_SAMPLE_META_DATA_DATA_DICT) + ) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_format_audio_sample_meta_data_dict_bad_input(mock_create_engine): + test_db = NimbusMySQLAlchemy() + invalid_is_wake_word = dict(TEST_AUDIO_SAMPLE_META_DATA_DATA_DICT) + invalid_is_wake_word["isWakeWord"] = "test" + invalid_noise_level = dict(TEST_AUDIO_SAMPLE_META_DATA_DATA_DICT) + invalid_noise_level["noiseLevel"] = "test" + + with pytest.raises(BadDictionaryValueError): + test_db.format_audio_sample_meta_data_dict(invalid_is_wake_word) + with pytest.raises(BadDictionaryValueError): + test_db.format_audio_sample_meta_data_dict(invalid_noise_level) + + +@patch("database_wrapper.create_engine") +def test_create_engine(mock_create_engine): + mock_engine = Mock() + mock_create_engine.return_value = mock_engine + + with open("testConfig.json", "w+") as test_config: + json.dump(TEST_CONFIG_DICT, test_config) + + test_db = NimbusMySQLAlchemy(TEST_CONFIG_FILENAME) + mock_create_engine.assert_called_once_with(SQLALCHEMY_DATABASE_URI) + assert test_db.engine is mock_engine + + os.remove(TEST_CONFIG_FILENAME) + + +@patch("database_wrapper.create_engine", return_value=None) +def test_create_engine_bad_config(mock_create_engine): + with open(TEST_CONFIG_FILENAME, "w+") as test_config: + json.dump(TEST_CONFIG_DICT, test_config) + + with pytest.raises(BadConfigFileError, match="failed to connect"): + test_db = NimbusMySQLAlchemy(TEST_CONFIG_FILENAME) + + os.remove(TEST_CONFIG_FILENAME) + + +def test_create_engine_missing_field(): + with open(TEST_CONFIG_FILENAME, "w+") as test_config: + json.dump({}, test_config) + + with pytest.raises(BadConfigFileError, match="missing mysql field"): + test_db = NimbusMySQLAlchemy(TEST_CONFIG_FILENAME) + + os.remove(TEST_CONFIG_FILENAME) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_insert_entity_view_error(mock_create_engine): + test_db = NimbusMySQLAlchemy() + + with pytest.raises(InvalidOperationOnView): + test_db.insert_entity(MockViewEntity, {}) + + +@patch.object(NimbusMySQLAlchemy, "_create_engine") +def test_update_entity_view_error(mock_create_engine): + test_db = NimbusMySQLAlchemy() + + with pytest.raises(InvalidOperationOnView): + test_db.update_entity(MockViewEntity, {}, []) diff --git a/tests/test_flask_api.py b/tests/test_flask_api.py new file mode 100644 index 0000000..3eec538 --- /dev/null +++ b/tests/test_flask_api.py @@ -0,0 +1,120 @@ +import json +import pytest + +import flask_api +from database_wrapper import ( + NimbusMySQLAlchemy, + BadDictionaryKeyError, + BadDictionaryValueError, + NimbusDatabaseError, + UnsupportedDatabaseError, + BadConfigFileError, +) +from io import BytesIO +from mock import patch, Mock +from modules.validators import WakeWordValidatorError +from .MockEntity import MockEntity + + +BAD_REQUEST = 400 +SUCCESS = 200 +SERVER_ERROR = 500 +TOKEN = "test_token" +TEST_ERROR = "test error string" + + +@pytest.fixture +def client(): + flask_api.app.config["TESTING"] = True + + with flask_api.app.test_client() as client: + yield client + + +def test_hello(client): + resp = client.get("/") + assert resp.json == {"name": "hello {}".format(flask_api.app)} + + test_data_dict = {"hello": "world"} + resp = client.post("/", json=test_data_dict) + assert resp.json == {"you sent": test_data_dict} + + +@patch("flask_api.nimbus") +@patch("flask_api.db") +def test_ask_request_not_json(mock_db, mock_nimbus, client): + resp = client.post("/ask", data="dummy data") + assert resp.status_code == BAD_REQUEST + assert resp.data == b"request must be JSON" + + +@patch("flask_api.nimbus") +@patch("flask_api.db") +def test_ask_no_question(mock_db, mock_nimbus, client): + resp = client.post("/ask", json={}) + assert resp.status_code == BAD_REQUEST + assert resp.data == b"request body should include the question" + + +@patch("flask_api.generate_session_token", return_value=TOKEN) +@patch("flask_api.nimbus") +@patch("flask_api.db") +def test_ask_question(mock_db, mock_nimbus, mock_generate_session_token, client): + test_answer = "test_answer" + dummy_token = "dummy_token" + + mock_nimbus.answer_question.return_value = test_answer + + # Verify that calling ask without a token will return a response with a new token + resp = client.post("/ask", json={"question": "test_question"}) + assert resp.status_code == SUCCESS + assert resp.json == {"answer": test_answer, "session": TOKEN} + + # Verify that calling ask with a token will return a response with the same token + resp = client.post( + "/ask", json={"question": "test_question", "session": dummy_token} + ) + assert resp.status_code == SUCCESS + assert resp.json == {"answer": test_answer, "session": dummy_token} + + +@patch("flask_api.save_audiofile") +@patch("flask_api.create_filename", return_value="test_filename") +@patch("flask_api.WakeWordValidator") +@patch("flask_api.WakeWordFormatter") +@patch("flask_api.db") +def test_new_data_wakeword( + mock_db, + mock_formatter, + mock_validator, + mock_create_filename, + mock_save_audiofile, + client, +): + mock_formatter_instance = Mock() + mock_formatter_instance.format.return_value = {"filename": "dummy"} + mock_formatter.return_value = mock_formatter_instance + + resp = client.post( + "/new_data/wakeword", + data={"test": "foo", "wav_file": (BytesIO(b"dummyText"), "dummyfile.txt")}, + ) + + # Verify that db client was told to save data, and that the newly generated filename was returned + mock_db.insert_entity.assert_called_once() + assert resp.data == b"Successfully stored audiofile as 'test_filename'" + + +@patch("flask_api.WakeWordValidator") +def test_new_data_wakeword_validator_issues(mock_validator, client): + mock_validator_instance = Mock() + mock_validator_instance.fix.side_effect = WakeWordValidatorError(TEST_ERROR) + mock_validator.return_value = mock_validator_instance + + # Verify that the client will catch and throw an error if the validator fails + resp = client.post( + "/new_data/wakeword", + data={"dummy1": "dummy2", "wav_file": (BytesIO(b"dummyText"), "dummyfile.txt")}, + ) + assert resp.status_code == BAD_REQUEST + assert resp.data == TEST_ERROR.encode() diff --git a/tests/test_validators.py b/tests/test_validators.py new file mode 100644 index 0000000..c444e7b --- /dev/null +++ b/tests/test_validators.py @@ -0,0 +1,45 @@ +import json +import pytest + +from modules.validators import WakeWordValidator, WakeWordValidatorError + +wake_word_data = { + "isWakeWord": "true", + "noiseLevel": "l", + "tone": "serious", + "location": "Cal Poly San Luis Obispo", + "gender": "m", + "lastName": "Waidhofer", + "firstName": "John", + "timestamp": "1589744893", + "username": "waidhofer", + "emphasis": "Emphasized", + "script": "testing 123", + "test": "foo", +} + +important_fields = [ + "isWakeWord", + "noiseLevel", + "tone", + "location", + "gender", + "lastName", + "firstName", + "emphasis", + "script", +] + + +def test_wake_word_missing_values(): + validator = WakeWordValidator() + for field in important_fields: + data = wake_word_data.copy() + data.pop(field) + issues = validator.validate(data) + print(issues) + + assert len(issues) == 1 + + with pytest.raises(WakeWordValidatorError): + data = validator.fix(data, issues) diff --git a/type_check.sh b/type_check.sh new file mode 100755 index 0000000..7b095c0 --- /dev/null +++ b/type_check.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# https://pyre-check.org/docs/guided-tour.html +# TODO +pyre --source-directory {{dirname}} check diff --git a/utilities/__init__.py b/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utilities/cases.py b/utilities/cases.py new file mode 100644 index 0000000..5be3375 --- /dev/null +++ b/utilities/cases.py @@ -0,0 +1,16 @@ +object1 = "myNumAndStringApplesAreCool42" +object = [] +alreadyNum = True +for char in object1: # for each character in the object + if char.isupper(): # if the character is uppercase + ascii = ord(char) + ascii += 32 + object.append("_" + chr(ascii)) + elif char in "0123456789" and alreadyNum == True: + alreadyNum = False + object.append("_" + str(char)) + elif char in "0123456789" and alreadyNum == False: + object.append(char) + else: + object.append(char) +print("".join(object)) # join the list diff --git a/utilities/yaml_utils.py b/utilities/yaml_utils.py new file mode 100644 index 0000000..a23b595 --- /dev/null +++ b/utilities/yaml_utils.py @@ -0,0 +1,39 @@ +import yaml + + +def load_yaml(filename: str) -> dict: + """ + Returns the dictionary representation of a given yaml filename + + Args: + filename: a "filename.yml" string + + Returns: + dictionary that represents the yaml file + + Raises: + yaml.YAMLError : if something bad happened + """ + with open(filename, "r") as f: + try: + return yaml.safe_load(f) + except yaml.YAMLError as e: + raise e + + +def dump_yaml(data: dict, filename: str) -> None: + """ + Saves the given data into the given yaml filename + + Args: + filename: a "filename.yml" string + data; yaml data + + Raises: + yaml.YAMLError : if something bad happened + """ + with open(filename, "w") as f: + try: + return yaml.safe_dump(data, f) + except yaml.YAMLError as e: + raise e