From a128451004201ce0b81269bb693c2bc43cd65aa6 Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Wed, 1 Dec 2010 16:35:46 -0800 Subject: [PATCH] Import Google's closure_linter Run with 'make lint' --- LICENSE | 3 + Makefile | 8 +- tools/closure_linter/PKG-INFO | 10 + tools/closure_linter/README | 9 + .../closure_linter.egg-info/PKG-INFO | 10 + .../closure_linter.egg-info/SOURCES.txt | 41 + .../dependency_links.txt | 1 + .../closure_linter.egg-info/entry_points.txt | 4 + .../closure_linter.egg-info/requires.txt | 1 + .../closure_linter.egg-info/top_level.txt | 1 + .../closure_linter/closure_linter/__init__.py | 1 + .../closure_linter/closure_linter/checker.py | 82 + .../closure_linter/checkerbase.py | 237 ++ .../closure_linter/common/__init__.py | 1 + .../closure_linter/common/error.py | 65 + .../closure_linter/common/erroraccumulator.py | 46 + .../closure_linter/common/errorhandler.py | 61 + .../closure_linter/common/errorprinter.py | 203 ++ .../closure_linter/common/filetestcase.py | 105 + .../closure_linter/common/htmlutil.py | 170 ++ .../closure_linter/common/lintrunner.py | 39 + .../closure_linter/common/matcher.py | 60 + .../closure_linter/common/position.py | 126 + .../closure_linter/common/simplefileflags.py | 190 ++ .../closure_linter/common/tokenizer.py | 184 ++ .../closure_linter/common/tokens.py | 125 + .../closure_linter/ecmalintrules.py | 752 +++++ .../closure_linter/ecmametadatapass.py | 521 ++++ .../closure_linter/error_fixer.py | 336 +++ .../closure_linter/errorrules.py | 42 + tools/closure_linter/closure_linter/errors.py | 131 + .../closure_linter/fixjsstyle.py | 47 + .../closure_linter/fixjsstyle_test.py | 61 + .../closure_linter/full_test.py | 99 + .../closure_linter/closure_linter/gjslint.py | 142 + .../closure_linter/indentation.py | 543 ++++ .../closure_linter/javascriptlintrules.py | 395 +++ .../closure_linter/javascriptstatetracker.py | 238 ++ .../javascriptstatetracker_test.py | 53 + .../closure_linter/javascripttokenizer.py | 365 +++ .../closure_linter/javascripttokens.py | 147 + .../closure_linter/statetracker.py | 964 +++++++ .../closure_linter/tokenutil.py | 285 ++ tools/closure_linter/gflags.py | 2489 +++++++++++++++++ tools/closure_linter/setup.cfg | 5 + tools/closure_linter/setup.py | 38 + 46 files changed, 9435 insertions(+), 1 deletion(-) create mode 100644 tools/closure_linter/PKG-INFO create mode 100644 tools/closure_linter/README create mode 100644 tools/closure_linter/closure_linter.egg-info/PKG-INFO create mode 100644 tools/closure_linter/closure_linter.egg-info/SOURCES.txt create mode 100644 tools/closure_linter/closure_linter.egg-info/dependency_links.txt create mode 100644 tools/closure_linter/closure_linter.egg-info/entry_points.txt create mode 100644 tools/closure_linter/closure_linter.egg-info/requires.txt create mode 100644 tools/closure_linter/closure_linter.egg-info/top_level.txt create mode 100755 tools/closure_linter/closure_linter/__init__.py create mode 100755 tools/closure_linter/closure_linter/checker.py create mode 100755 tools/closure_linter/closure_linter/checkerbase.py create mode 100755 tools/closure_linter/closure_linter/common/__init__.py create mode 100755 tools/closure_linter/closure_linter/common/error.py create mode 100755 tools/closure_linter/closure_linter/common/erroraccumulator.py create mode 100755 tools/closure_linter/closure_linter/common/errorhandler.py create mode 100755 tools/closure_linter/closure_linter/common/errorprinter.py create mode 100755 tools/closure_linter/closure_linter/common/filetestcase.py create mode 100755 tools/closure_linter/closure_linter/common/htmlutil.py create mode 100755 tools/closure_linter/closure_linter/common/lintrunner.py create mode 100755 tools/closure_linter/closure_linter/common/matcher.py create mode 100755 tools/closure_linter/closure_linter/common/position.py create mode 100755 tools/closure_linter/closure_linter/common/simplefileflags.py create mode 100755 tools/closure_linter/closure_linter/common/tokenizer.py create mode 100755 tools/closure_linter/closure_linter/common/tokens.py create mode 100755 tools/closure_linter/closure_linter/ecmalintrules.py create mode 100755 tools/closure_linter/closure_linter/ecmametadatapass.py create mode 100755 tools/closure_linter/closure_linter/error_fixer.py create mode 100755 tools/closure_linter/closure_linter/errorrules.py create mode 100755 tools/closure_linter/closure_linter/errors.py create mode 100755 tools/closure_linter/closure_linter/fixjsstyle.py create mode 100755 tools/closure_linter/closure_linter/fixjsstyle_test.py create mode 100755 tools/closure_linter/closure_linter/full_test.py create mode 100755 tools/closure_linter/closure_linter/gjslint.py create mode 100755 tools/closure_linter/closure_linter/indentation.py create mode 100755 tools/closure_linter/closure_linter/javascriptlintrules.py create mode 100755 tools/closure_linter/closure_linter/javascriptstatetracker.py create mode 100755 tools/closure_linter/closure_linter/javascriptstatetracker_test.py create mode 100755 tools/closure_linter/closure_linter/javascripttokenizer.py create mode 100755 tools/closure_linter/closure_linter/javascripttokens.py create mode 100755 tools/closure_linter/closure_linter/statetracker.py create mode 100755 tools/closure_linter/closure_linter/tokenutil.py create mode 100644 tools/closure_linter/gflags.py create mode 100644 tools/closure_linter/setup.cfg create mode 100755 tools/closure_linter/setup.py diff --git a/LICENSE b/LICENSE index 4a270f7c963..e6dbf35156b 100644 --- a/LICENSE +++ b/LICENSE @@ -34,6 +34,9 @@ are: - src/platform_darwin_proctitle.cc, has code taken from the Chromium project copyright Google Inc. and released with the BSD license. + - tools/closure_linter is copyrighted by The Closure Linter Authors and + Google Inc and is released under the Apache license. + Node's license follows: diff --git a/Makefile b/Makefile index b0040e02a0d..9ac09602caf 100644 --- a/Makefile +++ b/Makefile @@ -130,5 +130,11 @@ bench-idle: sleep 1 ./node benchmark/idle_clients.js & +lint: + @for i in lib/*.js; do \ + PYTHONPATH=tools/closure_linter/ python tools/closure_linter/closure_linter/gjslint.py \ + --unix_mode --strict --nojsdoc $$i || exit 1; \ + done -.PHONY: bench clean docopen docclean doc dist distclean check uninstall install all program staticlib dynamiclib test test-all website-upload + +.PHONY: lint bench clean docopen docclean doc dist distclean check uninstall install all program staticlib dynamiclib test test-all website-upload diff --git a/tools/closure_linter/PKG-INFO b/tools/closure_linter/PKG-INFO new file mode 100644 index 00000000000..b6e71c8f110 --- /dev/null +++ b/tools/closure_linter/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: closure_linter +Version: 2.2.6 +Summary: Closure Linter +Home-page: http://code.google.com/p/closure-linter +Author: The Closure Linter Authors +Author-email: opensource@google.com +License: Apache +Description: UNKNOWN +Platform: UNKNOWN diff --git a/tools/closure_linter/README b/tools/closure_linter/README new file mode 100644 index 00000000000..4a21b2defc3 --- /dev/null +++ b/tools/closure_linter/README @@ -0,0 +1,9 @@ +This repository contains the Closure Linter - a style checker for JavaScript. + +To install the application, run + python ./setup.py install + +After installing, you get two helper applications installed into /usr/local/bin: + + gjslint.py - runs the linter and checks for errors + fixjsstyle.py - tries to fix errors automatically diff --git a/tools/closure_linter/closure_linter.egg-info/PKG-INFO b/tools/closure_linter/closure_linter.egg-info/PKG-INFO new file mode 100644 index 00000000000..918e2433f9e --- /dev/null +++ b/tools/closure_linter/closure_linter.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: closure-linter +Version: 2.2.6 +Summary: Closure Linter +Home-page: http://code.google.com/p/closure-linter +Author: The Closure Linter Authors +Author-email: opensource@google.com +License: Apache +Description: UNKNOWN +Platform: UNKNOWN diff --git a/tools/closure_linter/closure_linter.egg-info/SOURCES.txt b/tools/closure_linter/closure_linter.egg-info/SOURCES.txt new file mode 100644 index 00000000000..b64d829f7e9 --- /dev/null +++ b/tools/closure_linter/closure_linter.egg-info/SOURCES.txt @@ -0,0 +1,41 @@ +README +setup.py +closure_linter/__init__.py +closure_linter/checker.py +closure_linter/checkerbase.py +closure_linter/ecmalintrules.py +closure_linter/ecmametadatapass.py +closure_linter/error_fixer.py +closure_linter/errorrules.py +closure_linter/errors.py +closure_linter/fixjsstyle.py +closure_linter/fixjsstyle_test.py +closure_linter/full_test.py +closure_linter/gjslint.py +closure_linter/indentation.py +closure_linter/javascriptlintrules.py +closure_linter/javascriptstatetracker.py +closure_linter/javascriptstatetracker_test.py +closure_linter/javascripttokenizer.py +closure_linter/javascripttokens.py +closure_linter/statetracker.py +closure_linter/tokenutil.py +closure_linter.egg-info/PKG-INFO +closure_linter.egg-info/SOURCES.txt +closure_linter.egg-info/dependency_links.txt +closure_linter.egg-info/entry_points.txt +closure_linter.egg-info/requires.txt +closure_linter.egg-info/top_level.txt +closure_linter/common/__init__.py +closure_linter/common/error.py +closure_linter/common/erroraccumulator.py +closure_linter/common/errorhandler.py +closure_linter/common/errorprinter.py +closure_linter/common/filetestcase.py +closure_linter/common/htmlutil.py +closure_linter/common/lintrunner.py +closure_linter/common/matcher.py +closure_linter/common/position.py +closure_linter/common/simplefileflags.py +closure_linter/common/tokenizer.py +closure_linter/common/tokens.py \ No newline at end of file diff --git a/tools/closure_linter/closure_linter.egg-info/dependency_links.txt b/tools/closure_linter/closure_linter.egg-info/dependency_links.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tools/closure_linter/closure_linter.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/tools/closure_linter/closure_linter.egg-info/entry_points.txt b/tools/closure_linter/closure_linter.egg-info/entry_points.txt new file mode 100644 index 00000000000..459b9d08b80 --- /dev/null +++ b/tools/closure_linter/closure_linter.egg-info/entry_points.txt @@ -0,0 +1,4 @@ +[console_scripts] +fixjsstyle = closure_linter.fixjsstyle:main +gjslint = closure_linter.gjslint:main + diff --git a/tools/closure_linter/closure_linter.egg-info/requires.txt b/tools/closure_linter/closure_linter.egg-info/requires.txt new file mode 100644 index 00000000000..71b67f110ca --- /dev/null +++ b/tools/closure_linter/closure_linter.egg-info/requires.txt @@ -0,0 +1 @@ +python-gflags \ No newline at end of file diff --git a/tools/closure_linter/closure_linter.egg-info/top_level.txt b/tools/closure_linter/closure_linter.egg-info/top_level.txt new file mode 100644 index 00000000000..7ece71f1c91 --- /dev/null +++ b/tools/closure_linter/closure_linter.egg-info/top_level.txt @@ -0,0 +1 @@ +closure_linter diff --git a/tools/closure_linter/closure_linter/__init__.py b/tools/closure_linter/closure_linter/__init__.py new file mode 100755 index 00000000000..4265cc3e6c1 --- /dev/null +++ b/tools/closure_linter/closure_linter/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/tools/closure_linter/closure_linter/checker.py b/tools/closure_linter/closure_linter/checker.py new file mode 100755 index 00000000000..4cdac931ffa --- /dev/null +++ b/tools/closure_linter/closure_linter/checker.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Core methods for checking JS files for common style guide violations.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import gflags as flags + +from closure_linter import checkerbase +from closure_linter import ecmametadatapass +from closure_linter import errors +from closure_linter import javascriptlintrules +from closure_linter import javascriptstatetracker +from closure_linter.common import errorprinter +from closure_linter.common import lintrunner + +flags.DEFINE_list('limited_doc_files', ['dummy.js', 'externs.js'], + 'List of files with relaxed documentation checks. Will not ' + 'report errors for missing documentation, some missing ' + 'descriptions, or methods whose @return tags don\'t have a ' + 'matching return statement.') + + +class JavaScriptStyleChecker(checkerbase.CheckerBase): + """Checker that applies JavaScriptLintRules.""" + + def __init__(self, error_handler): + """Initialize an JavaScriptStyleChecker object. + + Args: + error_handler: Error handler to pass all errors to + """ + checkerbase.CheckerBase.__init__( + self, + error_handler=error_handler, + lint_rules=javascriptlintrules.JavaScriptLintRules(), + state_tracker=javascriptstatetracker.JavaScriptStateTracker( + closurized_namespaces=flags.FLAGS.closurized_namespaces), + metadata_pass=ecmametadatapass.EcmaMetaDataPass(), + limited_doc_files=flags.FLAGS.limited_doc_files) + + +class GJsLintRunner(lintrunner.LintRunner): + """Wrapper class to run GJsLint.""" + + def Run(self, filenames, error_handler=None): + """Run GJsLint on the given filenames. + + Args: + filenames: The filenames to check + error_handler: An optional ErrorHandler object, an ErrorPrinter is used if + none is specified. + + Returns: + error_count, file_count: The number of errors and the number of files that + contain errors. + """ + if not error_handler: + error_handler = errorprinter.ErrorPrinter(errors.NEW_ERRORS) + + checker = JavaScriptStyleChecker(error_handler) + + # Check the list of files. + for filename in filenames: + checker.Check(filename) + + return error_handler diff --git a/tools/closure_linter/closure_linter/checkerbase.py b/tools/closure_linter/closure_linter/checkerbase.py new file mode 100755 index 00000000000..123cb728603 --- /dev/null +++ b/tools/closure_linter/closure_linter/checkerbase.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base classes for writing checkers that operate on tokens.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)', + 'jacobr@google.com (Jacob Richman)') + +import traceback + +import gflags as flags +from closure_linter import ecmametadatapass +from closure_linter import errorrules +from closure_linter import errors +from closure_linter import javascripttokenizer +from closure_linter.common import error +from closure_linter.common import htmlutil + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('debug_tokens', False, + 'Whether to print all tokens for debugging.') + +flags.DEFINE_boolean('error_trace', False, + 'Whether to show error exceptions.') + +class LintRulesBase(object): + """Base class for all classes defining the lint rules for a language.""" + + def __init__(self): + self.__checker = None + + def Initialize(self, checker, limited_doc_checks, is_html): + """Initializes to prepare to check a file. + + Args: + checker: Class to report errors to. + limited_doc_checks: Whether doc checking is relaxed for this file. + is_html: Whether the file is an HTML file with extracted contents. + """ + self.__checker = checker + self._limited_doc_checks = limited_doc_checks + self._is_html = is_html + + def _HandleError(self, code, message, token, position=None, + fix_data=None): + """Call the HandleError function for the checker we are associated with.""" + if errorrules.ShouldReportError(code): + self.__checker.HandleError(code, message, token, position, fix_data) + + def CheckToken(self, token, parser_state): + """Checks a token, given the current parser_state, for warnings and errors. + + Args: + token: The current token under consideration. + parser_state: Object that indicates the parser state in the page. + + Raises: + TypeError: If not overridden. + """ + raise TypeError('Abstract method CheckToken not implemented') + + def Finalize(self, parser_state, tokenizer_mode): + """Perform all checks that need to occur after all lines are processed. + + Args: + parser_state: State of the parser after parsing all tokens + tokenizer_mode: Mode of the tokenizer after parsing the entire page + + Raises: + TypeError: If not overridden. + """ + raise TypeError('Abstract method Finalize not implemented') + + +class CheckerBase(object): + """This class handles checking a LintRules object against a file.""" + + def __init__(self, error_handler, lint_rules, state_tracker, + limited_doc_files=None, metadata_pass=None): + """Initialize a checker object. + + Args: + error_handler: Object that handles errors. + lint_rules: LintRules object defining lint errors given a token + and state_tracker object. + state_tracker: Object that tracks the current state in the token stream. + limited_doc_files: List of filenames that are not required to have + documentation comments. + metadata_pass: Object that builds metadata about the token stream. + """ + self.__error_handler = error_handler + self.__lint_rules = lint_rules + self.__state_tracker = state_tracker + self.__metadata_pass = metadata_pass + self.__limited_doc_files = limited_doc_files + self.__tokenizer = javascripttokenizer.JavaScriptTokenizer() + self.__has_errors = False + + def HandleError(self, code, message, token, position=None, + fix_data=None): + """Prints out the given error message including a line number. + + Args: + code: The error code. + message: The error to print. + token: The token where the error occurred, or None if it was a file-wide + issue. + position: The position of the error, defaults to None. + fix_data: Metadata used for fixing the error. + """ + self.__has_errors = True + self.__error_handler.HandleError( + error.Error(code, message, token, position, fix_data)) + + def HasErrors(self): + """Returns true if the style checker has found any errors. + + Returns: + True if the style checker has found any errors. + """ + return self.__has_errors + + def Check(self, filename): + """Checks the file, printing warnings and errors as they are found. + + Args: + filename: The name of the file to check. + """ + try: + f = open(filename) + except IOError: + self.__error_handler.HandleFile(filename, None) + self.HandleError(errors.FILE_NOT_FOUND, 'File not found', None) + self.__error_handler.FinishFile() + return + + try: + if filename.endswith('.html') or filename.endswith('.htm'): + self.CheckLines(filename, htmlutil.GetScriptLines(f), True) + else: + self.CheckLines(filename, f, False) + finally: + f.close() + + def CheckLines(self, filename, lines_iter, is_html): + """Checks a file, given as an iterable of lines, for warnings and errors. + + Args: + filename: The name of the file to check. + lines_iter: An iterator that yields one line of the file at a time. + is_html: Whether the file being checked is an HTML file with extracted + contents. + + Returns: + A boolean indicating whether the full file could be checked or if checking + failed prematurely. + """ + limited_doc_checks = False + if self.__limited_doc_files: + for limited_doc_filename in self.__limited_doc_files: + if filename.endswith(limited_doc_filename): + limited_doc_checks = True + break + + state_tracker = self.__state_tracker + lint_rules = self.__lint_rules + state_tracker.Reset() + lint_rules.Initialize(self, limited_doc_checks, is_html) + + token = self.__tokenizer.TokenizeFile(lines_iter) + + parse_error = None + if self.__metadata_pass: + try: + self.__metadata_pass.Reset() + self.__metadata_pass.Process(token) + except ecmametadatapass.ParseError, caught_parse_error: + if FLAGS.error_trace: + traceback.print_exc() + parse_error = caught_parse_error + except Exception: + print 'Internal error in %s' % filename + traceback.print_exc() + return False + + self.__error_handler.HandleFile(filename, token) + + while token: + if FLAGS.debug_tokens: + print token + + if parse_error and parse_error.token == token: + # Report any parse errors from above once we find the token. + message = ('Error parsing file at token "%s". Unable to ' + 'check the rest of file.' % token.string) + self.HandleError(errors.FILE_DOES_NOT_PARSE, message, token) + self.__error_handler.FinishFile() + return False + + if FLAGS.error_trace: + state_tracker.HandleToken(token, state_tracker.GetLastNonSpaceToken()) + else: + try: + state_tracker.HandleToken(token, state_tracker.GetLastNonSpaceToken()) + except: + self.HandleError(errors.FILE_DOES_NOT_PARSE, + ('Error parsing file at token "%s". Unable to ' + 'check the rest of file.' % token.string), + token) + self.__error_handler.FinishFile() + return False + + # Check the token for style guide violations. + lint_rules.CheckToken(token, state_tracker) + + state_tracker.HandleAfterToken(token) + + # Move to the next token. + token = token.next + + lint_rules.Finalize(state_tracker, self.__tokenizer.mode) + self.__error_handler.FinishFile() + return True diff --git a/tools/closure_linter/closure_linter/common/__init__.py b/tools/closure_linter/closure_linter/common/__init__.py new file mode 100755 index 00000000000..4265cc3e6c1 --- /dev/null +++ b/tools/closure_linter/closure_linter/common/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/tools/closure_linter/closure_linter/common/error.py b/tools/closure_linter/closure_linter/common/error.py new file mode 100755 index 00000000000..0e3b4760107 --- /dev/null +++ b/tools/closure_linter/closure_linter/common/error.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Error object commonly used in linters.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class Error(object): + """Object representing a style error.""" + + def __init__(self, code, message, token, position, fix_data): + """Initialize the error object. + + Args: + code: The numeric error code. + message: The error message string. + token: The tokens.Token where the error occurred. + position: The position of the error within the token. + fix_data: Data to be used in autofixing. Codes with fix_data are: + GOOG_REQUIRES_NOT_ALPHABETIZED - List of string value tokens that are + class names in goog.requires calls. + """ + self.code = code + self.message = message + self.token = token + self.position = position + if token: + self.start_index = token.start_index + else: + self.start_index = 0 + self.fix_data = fix_data + if self.position: + self.start_index += self.position.start + + def Compare(a, b): + """Compare two error objects, by source code order. + + Args: + a: First error object. + b: Second error object. + + Returns: + A Negative/0/Positive number when a is before/the same as/after b. + """ + line_diff = a.token.line_number - b.token.line_number + if line_diff: + return line_diff + + return a.start_index - b.start_index + Compare = staticmethod(Compare) diff --git a/tools/closure_linter/closure_linter/common/erroraccumulator.py b/tools/closure_linter/closure_linter/common/erroraccumulator.py new file mode 100755 index 00000000000..7bb0c979597 --- /dev/null +++ b/tools/closure_linter/closure_linter/common/erroraccumulator.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linter error handler class that accumulates an array of errors.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +from closure_linter.common import errorhandler + + +class ErrorAccumulator(errorhandler.ErrorHandler): + """Error handler object that accumulates errors in a list.""" + + def __init__(self): + self._errors = [] + + def HandleError(self, error): + """Append the error to the list. + + Args: + error: The error object + """ + self._errors.append((error.token.line_number, error.code)) + + def GetErrors(self): + """Returns the accumulated errors. + + Returns: + A sequence of errors. + """ + return self._errors diff --git a/tools/closure_linter/closure_linter/common/errorhandler.py b/tools/closure_linter/closure_linter/common/errorhandler.py new file mode 100755 index 00000000000..764d54d84cb --- /dev/null +++ b/tools/closure_linter/closure_linter/common/errorhandler.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Interface for a linter error handler. + +Error handlers aggregate a set of errors from multiple files and can optionally +perform some action based on the reported errors, for example, logging the error +or automatically fixing it. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class ErrorHandler(object): + """Error handler interface.""" + + def __init__(self): + if self.__class__ == ErrorHandler: + raise NotImplementedError('class ErrorHandler is abstract') + + def HandleFile(self, filename, first_token): + """Notifies this ErrorHandler that subsequent errors are in filename. + + Args: + filename: The file being linted. + first_token: The first token of the file. + """ + + def HandleError(self, error): + """Append the error to the list. + + Args: + error: The error object + """ + + def FinishFile(self): + """Finishes handling the current file. + + Should be called after all errors in a file have been handled. + """ + + def GetErrors(self): + """Returns the accumulated errors. + + Returns: + A sequence of errors. + """ diff --git a/tools/closure_linter/closure_linter/common/errorprinter.py b/tools/closure_linter/closure_linter/common/errorprinter.py new file mode 100755 index 00000000000..c9754068f1f --- /dev/null +++ b/tools/closure_linter/closure_linter/common/errorprinter.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linter error handler class that prints errors to stdout.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import error +from closure_linter.common import errorhandler + +Error = error.Error + + +# The error message is of the format: +# Line , E:: message +DEFAULT_FORMAT = 1 + +# The error message is of the format: +# filename:[line number]:message +UNIX_FORMAT = 2 + + +class ErrorPrinter(errorhandler.ErrorHandler): + """ErrorHandler that prints errors to stdout.""" + + def __init__(self, new_errors=None): + """Initializes this error printer. + + Args: + new_errors: A sequence of error codes representing recently introduced + errors, defaults to None. + """ + # Number of errors + self._error_count = 0 + + # Number of new errors + self._new_error_count = 0 + + # Number of files checked + self._total_file_count = 0 + + # Number of files with errors + self._error_file_count = 0 + + # Dict of file name to number of errors + self._file_table = {} + + # List of errors for each file + self._file_errors = None + + # Current file + self._filename = None + + self._format = DEFAULT_FORMAT + + if new_errors: + self._new_errors = frozenset(new_errors) + else: + self._new_errors = frozenset(set()) + + def SetFormat(self, format): + """Sets the print format of errors. + + Args: + format: One of {DEFAULT_FORMAT, UNIX_FORMAT}. + """ + self._format = format + + def HandleFile(self, filename, first_token): + """Notifies this ErrorPrinter that subsequent errors are in filename. + + Sets the current file name, and sets a flag stating the header for this file + has not been printed yet. + + Should be called by a linter before a file is style checked. + + Args: + filename: The name of the file about to be checked. + first_token: The first token in the file, or None if there was an error + opening the file + """ + if self._filename and self._file_table[self._filename]: + print + + self._filename = filename + self._file_table[filename] = 0 + self._total_file_count += 1 + self._file_errors = [] + + def HandleError(self, error): + """Prints a formatted error message about the specified error. + + The error message is of the format: + Error #, line #: message + + Args: + error: The error object + """ + self._file_errors.append(error) + self._file_table[self._filename] += 1 + self._error_count += 1 + + if self._new_errors and error.code in self._new_errors: + self._new_error_count += 1 + + def _PrintError(self, error): + """Prints a formatted error message about the specified error. + + Args: + error: The error object + """ + new_error = self._new_errors and error.code in self._new_errors + if self._format == DEFAULT_FORMAT: + line = '' + if error.token: + line = 'Line %d, ' % error.token.line_number + + code = 'E:%04d' % error.code + if new_error: + print '%s%s: (New error) %s' % (line, code, error.message) + else: + print '%s%s: %s' % (line, code, error.message) + else: + # UNIX format + filename = self._filename + line = '' + if error.token: + line = '%d' % error.token.line_number + + error_code = '%04d' % error.code + if new_error: + error_code = 'New Error ' + error_code + print '%s:%s:(%s) %s' % (filename, line, error_code, error.message) + + def FinishFile(self): + """Finishes handling the current file.""" + if self._file_errors: + self._error_file_count += 1 + + if self._format != UNIX_FORMAT: + print '----- FILE : %s -----' % (self._filename) + + self._file_errors.sort(Error.Compare) + + for error in self._file_errors: + self._PrintError(error) + + def HasErrors(self): + """Whether this error printer encountered any errors. + + Returns: + True if the error printer encountered any errors. + """ + return self._error_count + + def HasNewErrors(self): + """Whether this error printer encountered any new errors. + + Returns: + True if the error printer encountered any new errors. + """ + return self._new_error_count + + def HasOldErrors(self): + """Whether this error printer encountered any old errors. + + Returns: + True if the error printer encountered any old errors. + """ + return self._error_count - self._new_error_count + + def PrintSummary(self): + """Print a summary of the number of errors and files.""" + if self.HasErrors() or self.HasNewErrors(): + print ('Found %d errors, including %d new errors, in %d files ' + '(%d files OK).' % ( + self._error_count, + self._new_error_count, + self._error_file_count, + self._total_file_count - self._error_file_count)) + else: + print '%d files checked, no errors found.' % self._total_file_count + + def PrintFileSummary(self): + """Print a detailed summary of the number of errors in each file.""" + keys = self._file_table.keys() + keys.sort() + for filename in keys: + print '%s: %d' % (filename, self._file_table[filename]) diff --git a/tools/closure_linter/closure_linter/common/filetestcase.py b/tools/closure_linter/closure_linter/common/filetestcase.py new file mode 100755 index 00000000000..ae4b883fe2b --- /dev/null +++ b/tools/closure_linter/closure_linter/common/filetestcase.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test case that runs a checker on a file, matching errors against annotations. + +Runs the given checker on the given file, accumulating all errors. The list +of errors is then matched against those annotated in the file. Based heavily +on devtools/javascript/gpylint/full_test.py. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import re + +import unittest as googletest +from closure_linter.common import erroraccumulator + + +class AnnotatedFileTestCase(googletest.TestCase): + """Test case to run a linter against a single file.""" + + # Matches an all caps letters + underscores error identifer + _MESSAGE = {'msg': '[A-Z][A-Z_]+'} + # Matches a //, followed by an optional line number with a +/-, followed by a + # list of message IDs. Used to extract expected messages from testdata files. + # TODO(robbyw): Generalize to use different commenting patterns. + _EXPECTED_RE = re.compile(r'\s*//\s*(?:(?P[+-]?[0-9]+):)?' + r'\s*(?P%(msg)s(?:,\s*%(msg)s)*)' % _MESSAGE) + + def __init__(self, filename, runner, converter): + """Create a single file lint test case. + + Args: + filename: Filename to test. + runner: Object implementing the LintRunner interface that lints a file. + converter: Function taking an error string and returning an error code. + """ + + googletest.TestCase.__init__(self, 'runTest') + self._filename = filename + self._messages = [] + self._runner = runner + self._converter = converter + + def shortDescription(self): + """Provides a description for the test.""" + return 'Run linter on %s' % self._filename + + def runTest(self): + """Runs the test.""" + try: + filename = self._filename + stream = open(filename) + except IOError, ex: + raise IOError('Could not find testdata resource for %s: %s' % + (self._filename, ex)) + + expected = self._GetExpectedMessages(stream) + got = self._ProcessFileAndGetMessages(filename) + self.assertEqual(expected, got) + + def _GetExpectedMessages(self, stream): + """Parse a file and get a sorted list of expected messages.""" + messages = [] + for i, line in enumerate(stream): + match = self._EXPECTED_RE.search(line) + if match: + line = match.group('line') + msg_ids = match.group('msgs') + if line is None: + line = i + 1 + elif line.startswith('+') or line.startswith('-'): + line = i + 1 + int(line) + else: + line = int(line) + for msg_id in msg_ids.split(','): + # Ignore a spurious message from the license preamble. + if msg_id != 'WITHOUT': + messages.append((line, self._converter(msg_id.strip()))) + stream.seek(0) + messages.sort() + return messages + + def _ProcessFileAndGetMessages(self, filename): + """Trap gpylint's output parse it to get messages added.""" + errors = erroraccumulator.ErrorAccumulator() + self._runner.Run([filename], errors) + + errors = errors.GetErrors() + errors.sort() + return errors diff --git a/tools/closure_linter/closure_linter/common/htmlutil.py b/tools/closure_linter/closure_linter/common/htmlutil.py new file mode 100755 index 00000000000..26d44c59083 --- /dev/null +++ b/tools/closure_linter/closure_linter/common/htmlutil.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for dealing with HTML.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +import cStringIO +import formatter +import htmllib +import HTMLParser +import re + + +class ScriptExtractor(htmllib.HTMLParser): + """Subclass of HTMLParser that extracts script contents from an HTML file. + + Also inserts appropriate blank lines so that line numbers in the extracted + code match the line numbers in the original HTML. + """ + + def __init__(self): + """Initialize a ScriptExtractor.""" + htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) + self._in_script = False + self._text = '' + + def start_script(self, attrs): + """Internal handler for the start of a script tag. + + Args: + attrs: The attributes of the script tag, as a list of tuples. + """ + for attribute in attrs: + if attribute[0].lower() == 'src': + # Skip script tags with a src specified. + return + self._in_script = True + + def end_script(self): + """Internal handler for the end of a script tag.""" + self._in_script = False + + def handle_data(self, data): + """Internal handler for character data. + + Args: + data: The character data from the HTML file. + """ + if self._in_script: + # If the last line contains whitespace only, i.e. is just there to + # properly align a tag, strip the whitespace. + if data.rstrip(' \t') != data.rstrip(' \t\n\r\f'): + data = data.rstrip(' \t') + self._text += data + else: + self._AppendNewlines(data) + + def handle_comment(self, data): + """Internal handler for HTML comments. + + Args: + data: The text of the comment. + """ + self._AppendNewlines(data) + + def _AppendNewlines(self, data): + """Count the number of newlines in the given string and append them. + + This ensures line numbers are correct for reported errors. + + Args: + data: The data to count newlines in. + """ + # We append 'x' to both sides of the string to ensure that splitlines + # gives us an accurate count. + for i in xrange(len(('x' + data + 'x').splitlines()) - 1): + self._text += '\n' + + def GetScriptLines(self): + """Return the extracted script lines. + + Returns: + The extracted script lines as a list of strings. + """ + return self._text.splitlines() + + +def GetScriptLines(f): + """Extract script tag contents from the given HTML file. + + Args: + f: The HTML file. + + Returns: + Lines in the HTML file that are from script tags. + """ + extractor = ScriptExtractor() + + # The HTML parser chokes on text like Array., so we patch + # that bug by replacing the < with < - escaping all text inside script + # tags would be better but it's a bit of a catch 22. + contents = f.read() + contents = re.sub(r'<([^\s\w/])', + lambda x: '<%s' % x.group(1), + contents) + + extractor.feed(contents) + extractor.close() + return extractor.GetScriptLines() + + +def StripTags(str): + """Returns the string with HTML tags stripped. + + Args: + str: An html string. + + Returns: + The html string with all tags stripped. If there was a parse error, returns + the text successfully parsed so far. + """ + # Brute force approach to stripping as much HTML as possible. If there is a + # parsing error, don't strip text before parse error position, and continue + # trying from there. + final_text = '' + finished = False + while not finished: + try: + strip = _HtmlStripper() + strip.feed(str) + strip.close() + str = strip.get_output() + final_text += str + finished = True + except HTMLParser.HTMLParseError, e: + final_text += str[:e.offset] + str = str[e.offset + 1:] + + return final_text + + +class _HtmlStripper(HTMLParser.HTMLParser): + """Simple class to strip tags from HTML. + + Does so by doing nothing when encountering tags, and appending character data + to a buffer when that is encountered. + """ + def __init__(self): + self.reset() + self.__output = cStringIO.StringIO() + + def handle_data(self, d): + self.__output.write(d) + + def get_output(self): + return self.__output.getvalue() diff --git a/tools/closure_linter/closure_linter/common/lintrunner.py b/tools/closure_linter/closure_linter/common/lintrunner.py new file mode 100755 index 00000000000..07842c7bfeb --- /dev/null +++ b/tools/closure_linter/closure_linter/common/lintrunner.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Interface for a lint running wrapper.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class LintRunner(object): + """Interface for a lint running wrapper.""" + + def __init__(self): + if self.__class__ == LintRunner: + raise NotImplementedError('class LintRunner is abstract') + + def Run(self, filenames, error_handler): + """Run a linter on the given filenames. + + Args: + filenames: The filenames to check + error_handler: An ErrorHandler object + + Returns: + The error handler, which may have been used to collect error info. + """ diff --git a/tools/closure_linter/closure_linter/common/matcher.py b/tools/closure_linter/closure_linter/common/matcher.py new file mode 100755 index 00000000000..9b4402c6718 --- /dev/null +++ b/tools/closure_linter/closure_linter/common/matcher.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regular expression based JavaScript matcher classes.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import position +from closure_linter.common import tokens + +# Shorthand +Token = tokens.Token +Position = position.Position + + +class Matcher(object): + """A token matcher. + + Specifies a pattern to match, the type of token it represents, what mode the + token changes to, and what mode the token applies to. + + Modes allow more advanced grammars to be incorporated, and are also necessary + to tokenize line by line. We can have different patterns apply to different + modes - i.e. looking for documentation while in comment mode. + + Attributes: + regex: The regular expression representing this matcher. + type: The type of token indicated by a successful match. + result_mode: The mode to move to after a successful match. + """ + + def __init__(self, regex, token_type, result_mode=None, line_start=False): + """Create a new matcher template. + + Args: + regex: The regular expression to match. + token_type: The type of token a successful match indicates. + result_mode: What mode to change to after a successful match. Defaults to + None, which means to not change the current mode. + line_start: Whether this matcher should only match string at the start + of a line. + """ + self.regex = regex + self.type = token_type + self.result_mode = result_mode + self.line_start = line_start diff --git a/tools/closure_linter/closure_linter/common/position.py b/tools/closure_linter/closure_linter/common/position.py new file mode 100755 index 00000000000..cebf17ef362 --- /dev/null +++ b/tools/closure_linter/closure_linter/common/position.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes to represent positions within strings.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class Position(object): + """Object representing a segment of a string. + + Attributes: + start: The index in to the string where the segment starts. + length: The length of the string segment. + """ + + def __init__(self, start, length): + """Initialize the position object. + + Args: + start: The start index. + length: The number of characters to include. + """ + self.start = start + self.length = length + + def Get(self, string): + """Returns this range of the given string. + + Args: + string: The string to slice. + + Returns: + The string within the range specified by this object. + """ + return string[self.start:self.start + self.length] + + def Set(self, target, source): + """Sets this range within the target string to the source string. + + Args: + target: The target string. + source: The source string. + + Returns: + The resulting string + """ + return target[:self.start] + source + target[self.start + self.length:] + + def AtEnd(string): + """Create a Position representing the end of the given string. + + Args: + string: The string to represent the end of. + + Returns: + The created Position object. + """ + return Position(len(string), 0) + AtEnd = staticmethod(AtEnd) + + def IsAtEnd(self, string): + """Returns whether this position is at the end of the given string. + + Args: + string: The string to test for the end of. + + Returns: + Whether this position is at the end of the given string. + """ + return self.start == len(string) and self.length == 0 + + def AtBeginning(): + """Create a Position representing the beginning of any string. + + Returns: + The created Position object. + """ + return Position(0, 0) + AtBeginning = staticmethod(AtBeginning) + + def IsAtBeginning(self): + """Returns whether this position is at the beginning of any string. + + Returns: + Whether this position is at the beginning of any string. + """ + return self.start == 0 and self.length == 0 + + def All(string): + """Create a Position representing the entire string. + + Args: + string: The string to represent the entirety of. + + Returns: + The created Position object. + """ + return Position(0, len(string)) + All = staticmethod(All) + + def Index(index): + """Returns a Position object for the specified index. + + Args: + index: The index to select, inclusively. + + Returns: + The created Position object. + """ + return Position(index, 1) + Index = staticmethod(Index) diff --git a/tools/closure_linter/closure_linter/common/simplefileflags.py b/tools/closure_linter/closure_linter/common/simplefileflags.py new file mode 100755 index 00000000000..3402bef3a1d --- /dev/null +++ b/tools/closure_linter/closure_linter/common/simplefileflags.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Determines the list of files to be checked from command line arguments.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import glob +import os +import re + +import gflags as flags + + +FLAGS = flags.FLAGS + +flags.DEFINE_multistring( + 'recurse', + None, + 'Recurse in to the subdirectories of the given path', + short_name='r') +flags.DEFINE_list( + 'exclude_directories', + ('_demos'), + 'Exclude the specified directories (only applicable along with -r or ' + '--presubmit)', + short_name='e') +flags.DEFINE_list( + 'exclude_files', + ('deps.js'), + 'Exclude the specified files', + short_name='x') + + +def MatchesSuffixes(filename, suffixes): + """Returns whether the given filename matches one of the given suffixes. + + Args: + filename: Filename to check. + suffixes: Sequence of suffixes to check. + + Returns: + Whether the given filename matches one of the given suffixes. + """ + suffix = filename[filename.rfind('.'):] + return suffix in suffixes + + +def _GetUserSpecifiedFiles(argv, suffixes): + """Returns files to be linted, specified directly on the command line. + + Can handle the '*' wildcard in filenames, but no other wildcards. + + Args: + argv: Sequence of command line arguments. The second and following arguments + are assumed to be files that should be linted. + suffixes: Expected suffixes for the file type being checked. + + Returns: + A sequence of files to be linted. + """ + files = argv[1:] or [] + all_files = [] + lint_files = [] + + # Perform any necessary globs. + for f in files: + if f.find('*') != -1: + for result in glob.glob(f): + all_files.append(result) + else: + all_files.append(f) + + for f in all_files: + if MatchesSuffixes(f, suffixes): + lint_files.append(f) + return lint_files + + +def _GetRecursiveFiles(suffixes): + """Returns files to be checked specified by the --recurse flag. + + Args: + suffixes: Expected suffixes for the file type being checked. + + Returns: + A list of files to be checked. + """ + lint_files = [] + # Perform any request recursion + if FLAGS.recurse: + for start in FLAGS.recurse: + for root, subdirs, files in os.walk(start): + for f in files: + if MatchesSuffixes(f, suffixes): + lint_files.append(os.path.join(root, f)) + return lint_files + + +def GetAllSpecifiedFiles(argv, suffixes): + """Returns all files specified by the user on the commandline. + + Args: + argv: Sequence of command line arguments. The second and following arguments + are assumed to be files that should be linted. + suffixes: Expected suffixes for the file type + + Returns: + A list of all files specified directly or indirectly (via flags) on the + command line by the user. + """ + files = _GetUserSpecifiedFiles(argv, suffixes) + + if FLAGS.recurse: + files += _GetRecursiveFiles(suffixes) + + return FilterFiles(files) + + +def FilterFiles(files): + """Filters the list of files to be linted be removing any excluded files. + + Filters out files excluded using --exclude_files and --exclude_directories. + + Args: + files: Sequence of files that needs filtering. + + Returns: + Filtered list of files to be linted. + """ + num_files = len(files) + + ignore_dirs_regexs = [] + for ignore in FLAGS.exclude_directories: + ignore_dirs_regexs.append(re.compile(r'(^|[\\/])%s[\\/]' % ignore)) + + result_files = [] + for f in files: + add_file = True + for exclude in FLAGS.exclude_files: + if f.endswith('/' + exclude) or f == exclude: + add_file = False + break + for ignore in ignore_dirs_regexs: + if ignore.search(f): + # Break out of ignore loop so we don't add to + # filtered files. + add_file = False + break + if add_file: + # Convert everything to absolute paths so we can easily remove duplicates + # using a set. + result_files.append(os.path.abspath(f)) + + skipped = num_files - len(result_files) + if skipped: + print 'Skipping %d file(s).' % skipped + + return set(result_files) + + +def GetFileList(argv, file_type, suffixes): + """Parse the flags and return the list of files to check. + + Args: + argv: Sequence of command line arguments. + suffixes: Sequence of acceptable suffixes for the file type. + + Returns: + The list of files to check. + """ + return sorted(GetAllSpecifiedFiles(argv, suffixes)) + + +def IsEmptyArgumentList(argv): + return not (len(argv[1:]) or FLAGS.recurse) diff --git a/tools/closure_linter/closure_linter/common/tokenizer.py b/tools/closure_linter/closure_linter/common/tokenizer.py new file mode 100755 index 00000000000..0234720d73b --- /dev/null +++ b/tools/closure_linter/closure_linter/common/tokenizer.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regular expression based lexer.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import tokens + +# Shorthand +Type = tokens.TokenType + + +class Tokenizer(object): + """General purpose tokenizer. + + Attributes: + mode: The latest mode of the tokenizer. This allows patterns to distinguish + if they are mid-comment, mid-parameter list, etc. + matchers: Dictionary of modes to sequences of matchers that define the + patterns to check at any given time. + default_types: Dictionary of modes to types, defining what type to give + non-matched text when in the given mode. Defaults to Type.NORMAL. + """ + + def __init__(self, starting_mode, matchers, default_types): + """Initialize the tokenizer. + + Args: + starting_mode: Mode to start in. + matchers: Dictionary of modes to sequences of matchers that defines the + patterns to check at any given time. + default_types: Dictionary of modes to types, defining what type to give + non-matched text when in the given mode. Defaults to Type.NORMAL. + """ + self.__starting_mode = starting_mode + self.matchers = matchers + self.default_types = default_types + + def TokenizeFile(self, file): + """Tokenizes the given file. + + Args: + file: An iterable that yields one line of the file at a time. + + Returns: + The first token in the file + """ + # The current mode. + self.mode = self.__starting_mode + # The first token in the stream. + self.__first_token = None + # The last token added to the token stream. + self.__last_token = None + # The current line number. + self.__line_number = 0 + + for line in file: + self.__line_number += 1 + self.__TokenizeLine(line) + + return self.__first_token + + def _CreateToken(self, string, token_type, line, line_number, values=None): + """Creates a new Token object (or subclass). + + Args: + string: The string of input the token represents. + token_type: The type of token. + line: The text of the line this token is in. + line_number: The line number of the token. + values: A dict of named values within the token. For instance, a + function declaration may have a value called 'name' which captures the + name of the function. + + Returns: + The newly created Token object. + """ + return tokens.Token(string, token_type, line, line_number, values) + + def __TokenizeLine(self, line): + """Tokenizes the given line. + + Args: + line: The contents of the line. + """ + string = line.rstrip('\n\r\f') + line_number = self.__line_number + self.__start_index = 0 + + if not string: + self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number)) + return + + normal_token = '' + index = 0 + while index < len(string): + for matcher in self.matchers[self.mode]: + if matcher.line_start and index > 0: + continue + + match = matcher.regex.match(string, index) + + if match: + if normal_token: + self.__AddToken( + self.__CreateNormalToken(self.mode, normal_token, line, + line_number)) + normal_token = '' + + # Add the match. + self.__AddToken(self._CreateToken(match.group(), matcher.type, line, + line_number, match.groupdict())) + + # Change the mode to the correct one for after this match. + self.mode = matcher.result_mode or self.mode + + # Shorten the string to be matched. + index = match.end() + + break + + else: + # If the for loop finishes naturally (i.e. no matches) we just add the + # first character to the string of consecutive non match characters. + # These will constitute a NORMAL token. + if string: + normal_token += string[index:index + 1] + index += 1 + + if normal_token: + self.__AddToken( + self.__CreateNormalToken(self.mode, normal_token, line, line_number)) + + def __CreateNormalToken(self, mode, string, line, line_number): + """Creates a normal token. + + Args: + mode: The current mode. + string: The string to tokenize. + line: The line of text. + line_number: The line number within the file. + + Returns: + A Token object, of the default type for the current mode. + """ + type = Type.NORMAL + if mode in self.default_types: + type = self.default_types[mode] + return self._CreateToken(string, type, line, line_number) + + def __AddToken(self, token): + """Add the given token to the token stream. + + Args: + token: The token to add. + """ + # Store the first token, or point the previous token to this one. + if not self.__first_token: + self.__first_token = token + else: + self.__last_token.next = token + + # Establish the doubly linked list + token.previous = self.__last_token + self.__last_token = token + + # Compute the character indices + token.start_index = self.__start_index + self.__start_index += token.length diff --git a/tools/closure_linter/closure_linter/common/tokens.py b/tools/closure_linter/closure_linter/common/tokens.py new file mode 100755 index 00000000000..5eaffa8cba4 --- /dev/null +++ b/tools/closure_linter/closure_linter/common/tokens.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes to represent tokens and positions within them.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class TokenType(object): + """Token types common to all languages.""" + NORMAL = 'normal' + WHITESPACE = 'whitespace' + BLANK_LINE = 'blank line' + + +class Token(object): + """Token class for intelligent text splitting. + + The token class represents a string of characters and an identifying type. + + Attributes: + type: The type of token. + string: The characters the token comprises. + length: The length of the token. + line: The text of the line the token is found in. + line_number: The number of the line the token is found in. + values: Dictionary of values returned from the tokens regex match. + previous: The token before this one. + next: The token after this one. + start_index: The character index in the line where this token starts. + attached_object: Object containing more information about this token. + metadata: Object containing metadata about this token. Must be added by + a separate metadata pass. + """ + + def __init__(self, string, token_type, line, line_number, values=None): + """Creates a new Token object. + + Args: + string: The string of input the token contains. + token_type: The type of token. + line: The text of the line this token is in. + line_number: The line number of the token. + values: A dict of named values within the token. For instance, a + function declaration may have a value called 'name' which captures the + name of the function. + """ + self.type = token_type + self.string = string + self.length = len(string) + self.line = line + self.line_number = line_number + self.values = values + + # These parts can only be computed when the file is fully tokenized + self.previous = None + self.next = None + self.start_index = None + + # This part is set in statetracker.py + # TODO(robbyw): Wrap this in to metadata + self.attached_object = None + + # This part is set in *metadatapass.py + self.metadata = None + + def IsFirstInLine(self): + """Tests if this token is the first token in its line. + + Returns: + Whether the token is the first token in its line. + """ + return not self.previous or self.previous.line_number != self.line_number + + def IsLastInLine(self): + """Tests if this token is the last token in its line. + + Returns: + Whether the token is the last token in its line. + """ + return not self.next or self.next.line_number != self.line_number + + def IsType(self, token_type): + """Tests if this token is of the given type. + + Args: + token_type: The type to test for. + + Returns: + True if the type of this token matches the type passed in. + """ + return self.type == token_type + + def IsAnyType(self, *token_types): + """Tests if this token is any of the given types. + + Args: + token_types: The types to check. Also accepts a single array. + + Returns: + True if the type of this token is any of the types passed in. + """ + if not isinstance(token_types[0], basestring): + return self.type in token_types[0] + else: + return self.type in token_types + + def __repr__(self): + return '' % (self.type, self.string, + self.values, self.line_number, + self.metadata) diff --git a/tools/closure_linter/closure_linter/ecmalintrules.py b/tools/closure_linter/closure_linter/ecmalintrules.py new file mode 100755 index 00000000000..a971b44d77e --- /dev/null +++ b/tools/closure_linter/closure_linter/ecmalintrules.py @@ -0,0 +1,752 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Core methods for checking EcmaScript files for common style guide violations. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)', + 'jacobr@google.com (Jacob Richman)') + +import re + +from closure_linter import checkerbase +from closure_linter import ecmametadatapass +from closure_linter import errors +from closure_linter import indentation +from closure_linter import javascripttokens +from closure_linter import javascripttokenizer +from closure_linter import statetracker +from closure_linter import tokenutil +from closure_linter.common import error +from closure_linter.common import htmlutil +from closure_linter.common import lintrunner +from closure_linter.common import position +from closure_linter.common import tokens +import gflags as flags + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('strict', False, + 'Whether to validate against the stricter Closure style.') +flags.DEFINE_list('custom_jsdoc_tags', '', 'Extra jsdoc tags to allow') + +# TODO(robbyw): Check for extra parens on return statements +# TODO(robbyw): Check for 0px in strings +# TODO(robbyw): Ensure inline jsDoc is in {} +# TODO(robbyw): Check for valid JS types in parameter docs + +# Shorthand +Context = ecmametadatapass.EcmaContext +Error = error.Error +Modes = javascripttokenizer.JavaScriptModes +Position = position.Position +Type = javascripttokens.JavaScriptTokenType + +class EcmaScriptLintRules(checkerbase.LintRulesBase): + """EmcaScript lint style checking rules. + + Can be used to find common style errors in JavaScript, ActionScript and other + Ecma like scripting languages. Style checkers for Ecma scripting languages + should inherit from this style checker. + Please do not add any state to EcmaScriptLintRules or to any subclasses. + + All state should be added to the StateTracker subclass used for a particular + language. + """ + + # Static constants. + MAX_LINE_LENGTH = 80 + + MISSING_PARAMETER_SPACE = re.compile(r',\S') + + EXTRA_SPACE = re.compile('(\(\s|\s\))') + + ENDS_WITH_SPACE = re.compile('\s$') + + ILLEGAL_TAB = re.compile(r'\t') + + # Regex used to split up complex types to check for invalid use of ? and |. + TYPE_SPLIT = re.compile(r'[,<>()]') + + # Regex for form of author lines after the @author tag. + AUTHOR_SPEC = re.compile(r'(\s*)[^\s]+@[^(\s]+(\s*)\(.+\)') + + # Acceptable tokens to remove for line too long testing. + LONG_LINE_IGNORE = frozenset(['*', '//', '@see'] + + ['@%s' % tag for tag in statetracker.DocFlag.HAS_TYPE]) + + def __init__(self): + """Initialize this lint rule object.""" + checkerbase.LintRulesBase.__init__(self) + + def Initialize(self, checker, limited_doc_checks, is_html): + """Initialize this lint rule object before parsing a new file.""" + checkerbase.LintRulesBase.Initialize(self, checker, limited_doc_checks, + is_html) + self._indentation = indentation.IndentationRules() + + def HandleMissingParameterDoc(self, token, param_name): + """Handle errors associated with a parameter missing a @param tag.""" + raise TypeError('Abstract method HandleMissingParameterDoc not implemented') + + def _CheckLineLength(self, last_token, state): + """Checks whether the line is too long. + + Args: + last_token: The last token in the line. + """ + # Start from the last token so that we have the flag object attached to + # and DOC_FLAG tokens. + line_number = last_token.line_number + token = last_token + + # Build a representation of the string where spaces indicate potential + # line-break locations. + line = [] + while token and token.line_number == line_number: + if state.IsTypeToken(token): + line.insert(0, 'x' * len(token.string)) + elif token.type in (Type.IDENTIFIER, Type.NORMAL): + # Dots are acceptable places to wrap. + line.insert(0, token.string.replace('.', ' ')) + else: + line.insert(0, token.string) + token = token.previous + + line = ''.join(line) + line = line.rstrip('\n\r\f') + try: + length = len(unicode(line, 'utf-8')) + except: + # Unknown encoding. The line length may be wrong, as was originally the + # case for utf-8 (see bug 1735846). For now just accept the default + # length, but as we find problems we can either add test for other + # possible encodings or return without an error to protect against + # false positives at the cost of more false negatives. + length = len(line) + + if length > self.MAX_LINE_LENGTH: + + # If the line matches one of the exceptions, then it's ok. + for long_line_regexp in self.GetLongLineExceptions(): + if long_line_regexp.match(last_token.line): + return + + # If the line consists of only one "word", or multiple words but all + # except one are ignoreable, then it's ok. + parts = set(line.split()) + + # We allow two "words" (type and name) when the line contains @param + max = 1 + if '@param' in parts: + max = 2 + + # Custom tags like @requires may have url like descriptions, so ignore + # the tag, similar to how we handle @see. + custom_tags = set(['@%s' % f for f in FLAGS.custom_jsdoc_tags]) + if (len(parts.difference(self.LONG_LINE_IGNORE | custom_tags)) > max): + self._HandleError(errors.LINE_TOO_LONG, + 'Line too long (%d characters).' % len(line), last_token) + + def _CheckJsDocType(self, token): + """Checks the given type for style errors. + + Args: + token: The DOC_FLAG token for the flag whose type to check. + """ + flag = token.attached_object + type = flag.type + if type and type is not None and not type.isspace(): + pieces = self.TYPE_SPLIT.split(type) + if len(pieces) == 1 and type.count('|') == 1 and ( + type.endswith('|null') or type.startswith('null|')): + self._HandleError(errors.JSDOC_PREFER_QUESTION_TO_PIPE_NULL, + 'Prefer "?Type" to "Type|null": "%s"' % type, token) + + for p in pieces: + if p.count('|') and p.count('?'): + # TODO(robbyw): We should do actual parsing of JsDoc types. As is, + # this won't report an error for {number|Array.?}, etc. + self._HandleError(errors.JSDOC_ILLEGAL_QUESTION_WITH_PIPE, + 'JsDoc types cannot contain both "?" and "|": "%s"' % p, token) + + if FLAGS.strict and (flag.type_start_token.type != Type.DOC_START_BRACE or + flag.type_end_token.type != Type.DOC_END_BRACE): + self._HandleError(errors.MISSING_BRACES_AROUND_TYPE, + 'Type must always be surrounded by curly braces.', token) + + def _CheckForMissingSpaceBeforeToken(self, token): + """Checks for a missing space at the beginning of a token. + + Reports a MISSING_SPACE error if the token does not begin with a space or + the previous token doesn't end with a space and the previous token is on the + same line as the token. + + Args: + token: The token being checked + """ + # TODO(user): Check if too many spaces? + if (len(token.string) == len(token.string.lstrip()) and + token.previous and token.line_number == token.previous.line_number and + len(token.previous.string) - len(token.previous.string.rstrip()) == 0): + self._HandleError( + errors.MISSING_SPACE, + 'Missing space before "%s"' % token.string, + token, + Position.AtBeginning()) + + def _ExpectSpaceBeforeOperator(self, token): + """Returns whether a space should appear before the given operator token. + + Args: + token: The operator token. + + Returns: + Whether there should be a space before the token. + """ + if token.string == ',' or token.metadata.IsUnaryPostOperator(): + return False + + # Colons should appear in labels, object literals, the case of a switch + # statement, and ternary operator. Only want a space in the case of the + # ternary operator. + if (token.string == ':' and + token.metadata.context.type in (Context.LITERAL_ELEMENT, + Context.CASE_BLOCK, + Context.STATEMENT)): + return False + + if token.metadata.IsUnaryOperator() and token.IsFirstInLine(): + return False + + return True + + def CheckToken(self, token, state): + """Checks a token, given the current parser_state, for warnings and errors. + + Args: + token: The current token under consideration + state: parser_state object that indicates the current state in the page + """ + # Store some convenience variables + first_in_line = token.IsFirstInLine() + last_in_line = token.IsLastInLine() + last_non_space_token = state.GetLastNonSpaceToken() + + type = token.type + + # Process the line change. + if not self._is_html and FLAGS.strict: + # TODO(robbyw): Support checking indentation in HTML files. + indentation_errors = self._indentation.CheckToken(token, state) + for indentation_error in indentation_errors: + self._HandleError(*indentation_error) + + if last_in_line: + self._CheckLineLength(token, state) + + if type == Type.PARAMETERS: + # Find missing spaces in parameter lists. + if self.MISSING_PARAMETER_SPACE.search(token.string): + self._HandleError(errors.MISSING_SPACE, 'Missing space after ","', + token) + + # Find extra spaces at the beginning of parameter lists. Make sure + # we aren't at the beginning of a continuing multi-line list. + if not first_in_line: + space_count = len(token.string) - len(token.string.lstrip()) + if space_count: + self._HandleError(errors.EXTRA_SPACE, 'Extra space after "("', + token, Position(0, space_count)) + + elif (type == Type.START_BLOCK and + token.metadata.context.type == Context.BLOCK): + self._CheckForMissingSpaceBeforeToken(token) + + elif type == Type.END_BLOCK: + # This check is for object literal end block tokens, but there is no need + # to test that condition since a comma at the end of any other kind of + # block is undoubtedly a parse error. + last_code = token.metadata.last_code + if last_code.IsOperator(','): + self._HandleError(errors.COMMA_AT_END_OF_LITERAL, + 'Illegal comma at end of object literal', last_code, + Position.All(last_code.string)) + + if state.InFunction() and state.IsFunctionClose(): + is_immediately_called = (token.next and + token.next.type == Type.START_PAREN) + if state.InTopLevelFunction(): + # When the function was top-level and not immediately called, check + # that it's terminated by a semi-colon. + if state.InAssignedFunction(): + if not is_immediately_called and (last_in_line or + not token.next.type == Type.SEMICOLON): + self._HandleError(errors.MISSING_SEMICOLON_AFTER_FUNCTION, + 'Missing semicolon after function assigned to a variable', + token, Position.AtEnd(token.string)) + else: + if not last_in_line and token.next.type == Type.SEMICOLON: + self._HandleError(errors.ILLEGAL_SEMICOLON_AFTER_FUNCTION, + 'Illegal semicolon after function declaration', + token.next, Position.All(token.next.string)) + + if (state.InInterfaceMethod() and last_code.type != Type.START_BLOCK): + self._HandleError(errors.INTERFACE_METHOD_CANNOT_HAVE_CODE, + 'Interface methods cannot contain code', last_code) + + elif (state.IsBlockClose() and + token.next and token.next.type == Type.SEMICOLON): + self._HandleError(errors.REDUNDANT_SEMICOLON, + 'No semicolon is required to end a code block', + token.next, Position.All(token.next.string)) + + elif type == Type.SEMICOLON: + if token.previous and token.previous.type == Type.WHITESPACE: + self._HandleError(errors.EXTRA_SPACE, 'Extra space before ";"', + token.previous, Position.All(token.previous.string)) + + if token.next and token.next.line_number == token.line_number: + if token.metadata.context.type != Context.FOR_GROUP_BLOCK: + # TODO(robbyw): Error about no multi-statement lines. + pass + + elif token.next.type not in ( + Type.WHITESPACE, Type.SEMICOLON, Type.END_PAREN): + self._HandleError(errors.MISSING_SPACE, + 'Missing space after ";" in for statement', + token.next, + Position.AtBeginning()) + + last_code = token.metadata.last_code + if last_code and last_code.type == Type.SEMICOLON: + # Allow a single double semi colon in for loops for cases like: + # for (;;) { }. + # NOTE(user): This is not a perfect check, and will not throw an error + # for cases like: for (var i = 0;; i < n; i++) {}, but then your code + # probably won't work either. + for_token = tokenutil.CustomSearch(last_code, + lambda token: token.type == Type.KEYWORD and token.string == 'for', + end_func=lambda token: token.type == Type.SEMICOLON, + distance=None, + reverse=True) + + if not for_token: + self._HandleError(errors.REDUNDANT_SEMICOLON, 'Redundant semicolon', + token, Position.All(token.string)) + + elif type == Type.START_PAREN: + if token.previous and token.previous.type == Type.KEYWORD: + self._HandleError(errors.MISSING_SPACE, 'Missing space before "("', + token, Position.AtBeginning()) + elif token.previous and token.previous.type == Type.WHITESPACE: + before_space = token.previous.previous + if (before_space and before_space.line_number == token.line_number and + before_space.type == Type.IDENTIFIER): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before "("', + token.previous, Position.All(token.previous.string)) + + elif type == Type.START_BRACKET: + if (not first_in_line and token.previous.type == Type.WHITESPACE and + last_non_space_token and + last_non_space_token.type in Type.EXPRESSION_ENDER_TYPES): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before "["', + token.previous, Position.All(token.previous.string)) + # If the [ token is the first token in a line we shouldn't complain + # about a missing space before [. This is because some Ecma script + # languages allow syntax like: + # [Annotation] + # class MyClass {...} + # So we don't want to blindly warn about missing spaces before [. + # In the the future, when rules for computing exactly how many spaces + # lines should be indented are added, then we can return errors for + # [ tokens that are improperly indented. + # For example: + # var someVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongVariableName = + # [a,b,c]; + # should trigger a proper indentation warning message as [ is not indented + # by four spaces. + elif (not first_in_line and token.previous and + not token.previous.type in ( + [Type.WHITESPACE, Type.START_PAREN, Type.START_BRACKET] + + Type.EXPRESSION_ENDER_TYPES)): + self._HandleError(errors.MISSING_SPACE, 'Missing space before "["', + token, Position.AtBeginning()) + + elif type in (Type.END_PAREN, Type.END_BRACKET): + # Ensure there is no space before closing parentheses, except when + # it's in a for statement with an omitted section, or when it's at the + # beginning of a line. + if (token.previous and token.previous.type == Type.WHITESPACE and + not token.previous.IsFirstInLine() and + not (last_non_space_token and last_non_space_token.line_number == + token.line_number and + last_non_space_token.type == Type.SEMICOLON)): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before "%s"' % + token.string, token.previous, Position.All(token.previous.string)) + + if token.type == Type.END_BRACKET: + last_code = token.metadata.last_code + if last_code.IsOperator(','): + self._HandleError(errors.COMMA_AT_END_OF_LITERAL, + 'Illegal comma at end of array literal', last_code, + Position.All(last_code.string)) + + elif type == Type.WHITESPACE: + if self.ILLEGAL_TAB.search(token.string): + if token.IsFirstInLine(): + self._HandleError(errors.ILLEGAL_TAB, + 'Illegal tab in whitespace before "%s"' % token.next.string, + token, Position.All(token.string)) + else: + self._HandleError(errors.ILLEGAL_TAB, + 'Illegal tab in whitespace after "%s"' % token.previous.string, + token, Position.All(token.string)) + + # Check whitespace length if it's not the first token of the line and + # if it's not immediately before a comment. + if last_in_line: + # Check for extra whitespace at the end of a line. + self._HandleError(errors.EXTRA_SPACE, 'Extra space at end of line', + token, Position.All(token.string)) + elif not first_in_line and not token.next.IsComment(): + if token.length > 1: + self._HandleError(errors.EXTRA_SPACE, 'Extra space after "%s"' % + token.previous.string, token, + Position(1, len(token.string) - 1)) + + elif type == Type.OPERATOR: + last_code = token.metadata.last_code + + if not self._ExpectSpaceBeforeOperator(token): + if (token.previous and token.previous.type == Type.WHITESPACE and + last_code and last_code.type in (Type.NORMAL, Type.IDENTIFIER)): + self._HandleError(errors.EXTRA_SPACE, + 'Extra space before "%s"' % token.string, token.previous, + Position.All(token.previous.string)) + + elif (token.previous and + not token.previous.IsComment() and + token.previous.type in Type.EXPRESSION_ENDER_TYPES): + self._HandleError(errors.MISSING_SPACE, + 'Missing space before "%s"' % token.string, token, + Position.AtBeginning()) + + # Check that binary operators are not used to start lines. + if ((not last_code or last_code.line_number != token.line_number) and + not token.metadata.IsUnaryOperator()): + self._HandleError(errors.LINE_STARTS_WITH_OPERATOR, + 'Binary operator should go on previous line "%s"' % token.string, + token) + + elif type == Type.DOC_FLAG: + flag = token.attached_object + + if flag.flag_type == 'bug': + # TODO(robbyw): Check for exactly 1 space on the left. + string = token.next.string.lstrip() + string = string.split(' ', 1)[0] + + if not string.isdigit(): + self._HandleError(errors.NO_BUG_NUMBER_AFTER_BUG_TAG, + '@bug should be followed by a bug number', token) + + elif flag.flag_type == 'suppress': + if flag.type is None: + # A syntactically invalid suppress tag will get tokenized as a normal + # flag, indicating an error. + self._HandleError(errors.INCORRECT_SUPPRESS_SYNTAX, + 'Invalid suppress syntax: should be @suppress {errortype}. ' + 'Spaces matter.', token) + elif flag.type not in state.GetDocFlag().SUPPRESS_TYPES: + self._HandleError(errors.INVALID_SUPPRESS_TYPE, + 'Invalid suppression type: %s' % flag.type, + token) + + elif FLAGS.strict and flag.flag_type == 'author': + # TODO(user): In non strict mode check the author tag for as much as + # it exists, though the full form checked below isn't required. + string = token.next.string + result = self.AUTHOR_SPEC.match(string) + if not result: + self._HandleError(errors.INVALID_AUTHOR_TAG_DESCRIPTION, + 'Author tag line should be of the form: ' + '@author foo@somewhere.com (Your Name)', + token.next) + else: + # Check spacing between email address and name. Do this before + # checking earlier spacing so positions are easier to calculate for + # autofixing. + num_spaces = len(result.group(2)) + if num_spaces < 1: + self._HandleError(errors.MISSING_SPACE, + 'Missing space after email address', + token.next, Position(result.start(2), 0)) + elif num_spaces > 1: + self._HandleError(errors.EXTRA_SPACE, + 'Extra space after email address', + token.next, + Position(result.start(2) + 1, num_spaces - 1)) + + # Check for extra spaces before email address. Can't be too few, if + # not at least one we wouldn't match @author tag. + num_spaces = len(result.group(1)) + if num_spaces > 1: + self._HandleError(errors.EXTRA_SPACE, + 'Extra space before email address', + token.next, Position(1, num_spaces - 1)) + + elif (flag.flag_type in state.GetDocFlag().HAS_DESCRIPTION and + not self._limited_doc_checks): + if flag.flag_type == 'param': + if flag.name is None: + self._HandleError(errors.MISSING_JSDOC_PARAM_NAME, + 'Missing name in @param tag', token) + + if not flag.description or flag.description is None: + flag_name = token.type + if 'name' in token.values: + flag_name = '@' + token.values['name'] + self._HandleError(errors.MISSING_JSDOC_TAG_DESCRIPTION, + 'Missing description in %s tag' % flag_name, token) + else: + self._CheckForMissingSpaceBeforeToken(flag.description_start_token) + + # We want punctuation to be inside of any tags ending a description, + # so strip tags before checking description. See bug 1127192. Note + # that depending on how lines break, the real description end token + # may consist only of stripped html and the effective end token can + # be different. + end_token = flag.description_end_token + end_string = htmlutil.StripTags(end_token.string).strip() + while (end_string == '' and not + end_token.type in Type.FLAG_ENDING_TYPES): + end_token = end_token.previous + if end_token.type in Type.FLAG_DESCRIPTION_TYPES: + end_string = htmlutil.StripTags(end_token.string).rstrip() + + if not (end_string.endswith('.') or end_string.endswith('?') or + end_string.endswith('!')): + # Find the position for the missing punctuation, inside of any html + # tags. + desc_str = end_token.string.rstrip() + while desc_str.endswith('>'): + start_tag_index = desc_str.rfind('<') + if start_tag_index < 0: + break + desc_str = desc_str[:start_tag_index].rstrip() + end_position = Position(len(desc_str), 0) + + self._HandleError( + errors.JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER, + ('%s descriptions must end with valid punctuation such as a ' + 'period.' % token.string), + end_token, end_position) + + if flag.flag_type in state.GetDocFlag().HAS_TYPE: + if flag.type_start_token is not None: + self._CheckForMissingSpaceBeforeToken( + token.attached_object.type_start_token) + + if flag.type and flag.type != '' and not flag.type.isspace(): + self._CheckJsDocType(token) + + if type in (Type.DOC_FLAG, Type.DOC_INLINE_FLAG): + if (token.values['name'] not in state.GetDocFlag().LEGAL_DOC and + token.values['name'] not in FLAGS.custom_jsdoc_tags): + self._HandleError(errors.INVALID_JSDOC_TAG, + 'Invalid JsDoc tag: %s' % token.values['name'], token) + + if (FLAGS.strict and token.values['name'] == 'inheritDoc' and + type == Type.DOC_INLINE_FLAG): + self._HandleError(errors.UNNECESSARY_BRACES_AROUND_INHERIT_DOC, + 'Unnecessary braces around @inheritDoc', + token) + + elif type == Type.SIMPLE_LVALUE: + identifier = token.values['identifier'] + + if ((not state.InFunction() or state.InConstructor()) and + not state.InParentheses() and not state.InObjectLiteralDescendant()): + jsdoc = state.GetDocComment() + if not state.HasDocComment(identifier): + # Only test for documentation on identifiers with .s in them to + # avoid checking things like simple variables. We don't require + # documenting assignments to .prototype itself (bug 1880803). + if (not state.InConstructor() and + identifier.find('.') != -1 and not + identifier.endswith('.prototype') and not + self._limited_doc_checks): + comment = state.GetLastComment() + if not (comment and comment.lower().count('jsdoc inherited')): + self._HandleError(errors.MISSING_MEMBER_DOCUMENTATION, + "No docs found for member '%s'" % identifier, + token); + elif jsdoc and (not state.InConstructor() or + identifier.startswith('this.')): + # We are at the top level and the function/member is documented. + if identifier.endswith('_') and not identifier.endswith('__'): + if jsdoc.HasFlag('override'): + self._HandleError(errors.INVALID_OVERRIDE_PRIVATE, + '%s should not override a private member.' % identifier, + jsdoc.GetFlag('override').flag_token) + # Can have a private class which inherits documentation from a + # public superclass. + if jsdoc.HasFlag('inheritDoc') and not jsdoc.HasFlag('constructor'): + self._HandleError(errors.INVALID_INHERIT_DOC_PRIVATE, + '%s should not inherit from a private member.' % identifier, + jsdoc.GetFlag('inheritDoc').flag_token) + if (not jsdoc.HasFlag('private') and + not ('underscore' in jsdoc.suppressions)): + self._HandleError(errors.MISSING_PRIVATE, + 'Member "%s" must have @private JsDoc.' % + identifier, token) + if jsdoc.HasFlag('private') and 'underscore' in jsdoc.suppressions: + self._HandleError(errors.UNNECESSARY_SUPPRESS, + '@suppress {underscore} is not necessary with @private', + jsdoc.suppressions['underscore']) + elif jsdoc.HasFlag('private'): + self._HandleError(errors.EXTRA_PRIVATE, + 'Member "%s" must not have @private JsDoc' % + identifier, token) + + if ((jsdoc.HasFlag('desc') or jsdoc.HasFlag('hidden')) + and not identifier.startswith('MSG_') + and identifier.find('.MSG_') == -1): + # TODO(user): Update error message to show the actual invalid + # tag, either @desc or @hidden. + self._HandleError(errors.INVALID_USE_OF_DESC_TAG, + 'Member "%s" should not have @desc JsDoc' % identifier, + token) + + # Check for illegaly assigning live objects as prototype property values. + index = identifier.find('.prototype.') + # Ignore anything with additional .s after the prototype. + if index != -1 and identifier.find('.', index + 11) == -1: + equal_operator = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + next_code = tokenutil.SearchExcept(equal_operator, Type.NON_CODE_TYPES) + if next_code and ( + next_code.type in (Type.START_BRACKET, Type.START_BLOCK) or + next_code.IsOperator('new')): + self._HandleError(errors.ILLEGAL_PROTOTYPE_MEMBER_VALUE, + 'Member %s cannot have a non-primitive value' % identifier, + token) + + elif type == Type.END_PARAMETERS: + # Find extra space at the end of parameter lists. We check the token + # prior to the current one when it is a closing paren. + if (token.previous and token.previous.type == Type.PARAMETERS + and self.ENDS_WITH_SPACE.search(token.previous.string)): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before ")"', + token.previous) + + jsdoc = state.GetDocComment() + if state.GetFunction().is_interface: + if token.previous and token.previous.type == Type.PARAMETERS: + self._HandleError(errors.INTERFACE_CONSTRUCTOR_CANNOT_HAVE_PARAMS, + 'Interface constructor cannot have parameters', + token.previous) + elif (state.InTopLevel() and jsdoc and not jsdoc.HasFlag('see') + and not jsdoc.InheritsDocumentation() + and not state.InObjectLiteralDescendant() and not + jsdoc.IsInvalidated()): + distance, edit = jsdoc.CompareParameters(state.GetParams()) + if distance: + params_iter = iter(state.GetParams()) + docs_iter = iter(jsdoc.ordered_params) + + for op in edit: + if op == 'I': + # Insertion. + # Parsing doc comments is the same for all languages + # but some languages care about parameters that don't have + # doc comments and some languages don't care. + # Languages that don't allow variables to by typed such as + # JavaScript care but languages such as ActionScript or Java + # that allow variables to be typed don't care. + self.HandleMissingParameterDoc(token, params_iter.next()) + + elif op == 'D': + # Deletion + self._HandleError(errors.EXTRA_PARAMETER_DOCUMENTATION, + 'Found docs for non-existing parameter: "%s"' % + docs_iter.next(), token) + elif op == 'S': + # Substitution + self._HandleError(errors.WRONG_PARAMETER_DOCUMENTATION, + 'Parameter mismatch: got "%s", expected "%s"' % + (params_iter.next(), docs_iter.next()), token) + + else: + # Equality - just advance the iterators + params_iter.next() + docs_iter.next() + + elif type == Type.STRING_TEXT: + # If this is the first token after the start of the string, but it's at + # the end of a line, we know we have a multi-line string. + if token.previous.type in (Type.SINGLE_QUOTE_STRING_START, + Type.DOUBLE_QUOTE_STRING_START) and last_in_line: + self._HandleError(errors.MULTI_LINE_STRING, + 'Multi-line strings are not allowed', token) + + + # This check is orthogonal to the ones above, and repeats some types, so + # it is a plain if and not an elif. + if token.type in Type.COMMENT_TYPES: + if self.ILLEGAL_TAB.search(token.string): + self._HandleError(errors.ILLEGAL_TAB, + 'Illegal tab in comment "%s"' % token.string, token) + + trimmed = token.string.rstrip() + if last_in_line and token.string != trimmed: + # Check for extra whitespace at the end of a line. + self._HandleError(errors.EXTRA_SPACE, 'Extra space at end of line', + token, Position(len(trimmed), len(token.string) - len(trimmed))) + + # This check is also orthogonal since it is based on metadata. + if token.metadata.is_implied_semicolon: + self._HandleError(errors.MISSING_SEMICOLON, + 'Missing semicolon at end of line', token) + + def Finalize(self, state, tokenizer_mode): + last_non_space_token = state.GetLastNonSpaceToken() + # Check last line for ending with newline. + if state.GetLastLine() and not (state.GetLastLine().isspace() or + state.GetLastLine().rstrip('\n\r\f') != state.GetLastLine()): + self._HandleError( + errors.FILE_MISSING_NEWLINE, + 'File does not end with new line. (%s)' % state.GetLastLine(), + last_non_space_token) + + # Check that the mode is not mid comment, argument list, etc. + if not tokenizer_mode == Modes.TEXT_MODE: + self._HandleError( + errors.FILE_IN_BLOCK, + 'File ended in mode "%s".' % tokenizer_mode, + last_non_space_token) + + try: + self._indentation.Finalize() + except Exception, e: + self._HandleError( + errors.FILE_DOES_NOT_PARSE, + str(e), + last_non_space_token) + + def GetLongLineExceptions(self): + """Gets a list of regexps for lines which can be longer than the limit.""" + return [] diff --git a/tools/closure_linter/closure_linter/ecmametadatapass.py b/tools/closure_linter/closure_linter/ecmametadatapass.py new file mode 100755 index 00000000000..2c797b3c399 --- /dev/null +++ b/tools/closure_linter/closure_linter/ecmametadatapass.py @@ -0,0 +1,521 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Metadata pass for annotating tokens in EcmaScript files.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +from closure_linter import javascripttokens +from closure_linter import tokenutil + + +TokenType = javascripttokens.JavaScriptTokenType + + +class ParseError(Exception): + """Exception indicating a parse error at the given token. + + Attributes: + token: The token where the parse error occurred. + """ + + def __init__(self, token, message=None): + """Initialize a parse error at the given token with an optional message. + + Args: + token: The token where the parse error occurred. + message: A message describing the parse error. + """ + Exception.__init__(self, message) + self.token = token + + +class EcmaContext(object): + """Context object for EcmaScript languages. + + Attributes: + type: The context type. + start_token: The token where this context starts. + end_token: The token where this context ends. + parent: The parent context. + """ + + # The root context. + ROOT = 'root' + + # A block of code. + BLOCK = 'block' + + # A pseudo-block of code for a given case or default section. + CASE_BLOCK = 'case_block' + + # Block of statements in a for loop's parentheses. + FOR_GROUP_BLOCK = 'for_block' + + # An implied block of code for 1 line if, while, and for statements + IMPLIED_BLOCK = 'implied_block' + + # An index in to an array or object. + INDEX = 'index' + + # An array literal in []. + ARRAY_LITERAL = 'array_literal' + + # An object literal in {}. + OBJECT_LITERAL = 'object_literal' + + # An individual element in an array or object literal. + LITERAL_ELEMENT = 'literal_element' + + # The portion of a ternary statement between ? and : + TERNARY_TRUE = 'ternary_true' + + # The portion of a ternary statment after : + TERNARY_FALSE = 'ternary_false' + + # The entire switch statment. This will contain a GROUP with the variable + # and a BLOCK with the code. + + # Since that BLOCK is not a normal block, it can not contain statements except + # for case and default. + SWITCH = 'switch' + + # A normal comment. + COMMENT = 'comment' + + # A JsDoc comment. + DOC = 'doc' + + # An individual statement. + STATEMENT = 'statement' + + # Code within parentheses. + GROUP = 'group' + + # Parameter names in a function declaration. + PARAMETERS = 'parameters' + + # A set of variable declarations appearing after the 'var' keyword. + VAR = 'var' + + # Context types that are blocks. + BLOCK_TYPES = frozenset([ + ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK]) + + def __init__(self, type, start_token, parent): + """Initializes the context object. + + Args: + type: The context type. + start_token: The token where this context starts. + parent: The parent context. + """ + self.type = type + self.start_token = start_token + self.end_token = None + self.parent = parent + + def __repr__(self): + """Returns a string representation of the context object.""" + stack = [] + context = self + while context: + stack.append(context.type) + context = context.parent + return 'Context(%s)' % ' > '.join(stack) + + +class EcmaMetaData(object): + """Token metadata for EcmaScript languages. + + Attributes: + last_code: The last code token to appear before this one. + context: The context this token appears in. + operator_type: The operator type, will be one of the *_OPERATOR constants + defined below. + """ + + UNARY_OPERATOR = 'unary' + + UNARY_POST_OPERATOR = 'unary_post' + + BINARY_OPERATOR = 'binary' + + TERNARY_OPERATOR = 'ternary' + + def __init__(self): + """Initializes a token metadata object.""" + self.last_code = None + self.context = None + self.operator_type = None + self.is_implied_semicolon = False + self.is_implied_block = False + self.is_implied_block_close = False + + def __repr__(self): + """Returns a string representation of the context object.""" + parts = ['%r' % self.context] + if self.operator_type: + parts.append('optype: %r' % self.operator_type) + if self.is_implied_semicolon: + parts.append('implied;') + return 'MetaData(%s)' % ', '.join(parts) + + def IsUnaryOperator(self): + return self.operator_type in (EcmaMetaData.UNARY_OPERATOR, + EcmaMetaData.UNARY_POST_OPERATOR) + + def IsUnaryPostOperator(self): + return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR + + +class EcmaMetaDataPass(object): + """A pass that iterates over all tokens and builds metadata about them.""" + + def __init__(self): + """Initialize the meta data pass object.""" + self.Reset() + + def Reset(self): + """Resets the metadata pass to prepare for the next file.""" + self._token = None + self._context = None + self._AddContext(EcmaContext.ROOT) + self._last_code = None + + def _CreateContext(self, type): + """Overridable by subclasses to create the appropriate context type.""" + return EcmaContext(type, self._token, self._context) + + def _CreateMetaData(self): + """Overridable by subclasses to create the appropriate metadata type.""" + return EcmaMetaData() + + def _AddContext(self, type): + """Adds a context of the given type to the context stack. + + Args: + type: The type of context to create + """ + self._context = self._CreateContext(type) + + def _PopContext(self): + """Moves up one level in the context stack. + + Returns: + The former context. + + Raises: + ParseError: If the root context is popped. + """ + top_context = self._context + top_context.end_token = self._token + self._context = top_context.parent + if self._context: + return top_context + else: + raise ParseError(self._token) + + def _PopContextType(self, *stop_types): + """Pops the context stack until a context of the given type is popped. + + Args: + stop_types: The types of context to pop to - stops at the first match. + + Returns: + The context object of the given type that was popped. + """ + last = None + while not last or last.type not in stop_types: + last = self._PopContext() + return last + + def _EndStatement(self): + """Process the end of a statement.""" + self._PopContextType(EcmaContext.STATEMENT) + if self._context.type == EcmaContext.IMPLIED_BLOCK: + self._token.metadata.is_implied_block_close = True + self._PopContext() + + def _ProcessContext(self): + """Process the context at the current token. + + Returns: + The context that should be assigned to the current token, or None if + the current context after this method should be used. + + Raises: + ParseError: When the token appears in an invalid context. + """ + token = self._token + token_type = token.type + + if self._context.type in EcmaContext.BLOCK_TYPES: + # Whenever we're in a block, we add a statement context. We make an + # exception for switch statements since they can only contain case: and + # default: and therefore don't directly contain statements. + # The block we add here may be immediately removed in some cases, but + # that causes no harm. + parent = self._context.parent + if not parent or parent.type != EcmaContext.SWITCH: + self._AddContext(EcmaContext.STATEMENT) + + elif self._context.type == EcmaContext.ARRAY_LITERAL: + self._AddContext(EcmaContext.LITERAL_ELEMENT) + + if token_type == TokenType.START_PAREN: + if self._last_code and self._last_code.IsKeyword('for'): + # for loops contain multiple statements in the group unlike while, + # switch, if, etc. + self._AddContext(EcmaContext.FOR_GROUP_BLOCK) + else: + self._AddContext(EcmaContext.GROUP) + + elif token_type == TokenType.END_PAREN: + result = self._PopContextType(EcmaContext.GROUP, + EcmaContext.FOR_GROUP_BLOCK) + keyword_token = result.start_token.metadata.last_code + # keyword_token will not exist if the open paren is the first line of the + # file, for example if all code is wrapped in an immediately executed + # annonymous function. + if keyword_token and keyword_token.string in ('if', 'for', 'while'): + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + if next_code.type != TokenType.START_BLOCK: + # Check for do-while. + is_do_while = False + pre_keyword_token = keyword_token.metadata.last_code + if (pre_keyword_token and + pre_keyword_token.type == TokenType.END_BLOCK): + start_block_token = pre_keyword_token.metadata.context.start_token + is_do_while = start_block_token.metadata.last_code.string == 'do' + + # If it's not do-while, it's an implied block. + if not is_do_while: + self._AddContext(EcmaContext.IMPLIED_BLOCK) + token.metadata.is_implied_block = True + + return result + + # else (not else if) with no open brace after it should be considered the + # start of an implied block, similar to the case with if, for, and while + # above. + elif (token_type == TokenType.KEYWORD and + token.string == 'else'): + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + if (next_code.type != TokenType.START_BLOCK and + (next_code.type != TokenType.KEYWORD or next_code.string != 'if')): + self._AddContext(EcmaContext.IMPLIED_BLOCK) + token.metadata.is_implied_block = True + + elif token_type == TokenType.START_PARAMETERS: + self._AddContext(EcmaContext.PARAMETERS) + + elif token_type == TokenType.END_PARAMETERS: + return self._PopContextType(EcmaContext.PARAMETERS) + + elif token_type == TokenType.START_BRACKET: + if (self._last_code and + self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES): + self._AddContext(EcmaContext.INDEX) + else: + self._AddContext(EcmaContext.ARRAY_LITERAL) + + elif token_type == TokenType.END_BRACKET: + return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL) + + elif token_type == TokenType.START_BLOCK: + if (self._last_code.type in (TokenType.END_PAREN, + TokenType.END_PARAMETERS) or + self._last_code.IsKeyword('else') or + self._last_code.IsKeyword('do') or + self._last_code.IsKeyword('try') or + self._last_code.IsKeyword('finally') or + (self._last_code.IsOperator(':') and + self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)): + # else, do, try, and finally all might have no () before {. + # Also, handle the bizzare syntax case 10: {...}. + self._AddContext(EcmaContext.BLOCK) + else: + self._AddContext(EcmaContext.OBJECT_LITERAL) + + elif token_type == TokenType.END_BLOCK: + context = self._PopContextType(EcmaContext.BLOCK, + EcmaContext.OBJECT_LITERAL) + if self._context.type == EcmaContext.SWITCH: + # The end of the block also means the end of the switch statement it + # applies to. + return self._PopContext() + return context + + elif token.IsKeyword('switch'): + self._AddContext(EcmaContext.SWITCH) + + elif (token_type == TokenType.KEYWORD and + token.string in ('case', 'default')): + # Pop up to but not including the switch block. + while self._context.parent.type != EcmaContext.SWITCH: + self._PopContext() + + elif token.IsOperator('?'): + self._AddContext(EcmaContext.TERNARY_TRUE) + + elif token.IsOperator(':'): + if self._context.type == EcmaContext.OBJECT_LITERAL: + self._AddContext(EcmaContext.LITERAL_ELEMENT) + + elif self._context.type == EcmaContext.TERNARY_TRUE: + self._PopContext() + self._AddContext(EcmaContext.TERNARY_FALSE) + + # Handle nested ternary statements like: + # foo = bar ? baz ? 1 : 2 : 3 + # When we encounter the second ":" the context is + # ternary_false > ternary_true > statement > root + elif (self._context.type == EcmaContext.TERNARY_FALSE and + self._context.parent.type == EcmaContext.TERNARY_TRUE): + self._PopContext() # Leave current ternary false context. + self._PopContext() # Leave current parent ternary true + self._AddContext(EcmaContext.TERNARY_FALSE) + + elif self._context.parent.type == EcmaContext.SWITCH: + self._AddContext(EcmaContext.CASE_BLOCK) + + elif token.IsKeyword('var'): + self._AddContext(EcmaContext.VAR) + + elif token.IsOperator(','): + while self._context.type not in (EcmaContext.VAR, + EcmaContext.ARRAY_LITERAL, + EcmaContext.OBJECT_LITERAL, + EcmaContext.STATEMENT, + EcmaContext.PARAMETERS, + EcmaContext.GROUP): + self._PopContext() + + elif token_type == TokenType.SEMICOLON: + self._EndStatement() + + def Process(self, first_token): + """Processes the token stream starting with the given token.""" + self._token = first_token + while self._token: + self._ProcessToken() + + if self._token.IsCode(): + self._last_code = self._token + + self._token = self._token.next + + try: + self._PopContextType(self, EcmaContext.ROOT) + except ParseError: + # Ignore the "popped to root" error. + pass + + def _ProcessToken(self): + """Process the given token.""" + token = self._token + token.metadata = self._CreateMetaData() + context = (self._ProcessContext() or self._context) + token.metadata.context = context + token.metadata.last_code = self._last_code + + # Determine the operator type of the token, if applicable. + if token.type == TokenType.OPERATOR: + token.metadata.operator_type = self._GetOperatorType(token) + + # Determine if there is an implied semicolon after the token. + if token.type != TokenType.SEMICOLON: + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + # A statement like if (x) does not need a semicolon after it + is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK + is_last_code_in_line = token.IsCode() and ( + not next_code or next_code.line_number != token.line_number) + is_continued_identifier = (token.type == TokenType.IDENTIFIER and + token.string.endswith('.')) + is_continued_operator = (token.type == TokenType.OPERATOR and + not token.metadata.IsUnaryPostOperator()) + is_continued_dot = token.string == '.' + next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR + next_code_is_dot = next_code and next_code.string == '.' + is_end_of_block = (token.type == TokenType.END_BLOCK and + token.metadata.context.type != EcmaContext.OBJECT_LITERAL) + is_multiline_string = token.type == TokenType.STRING_TEXT + next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK + if (is_last_code_in_line and + self._StatementCouldEndInContext() and + not is_multiline_string and + not is_end_of_block and + not is_continued_identifier and + not is_continued_operator and + not is_continued_dot and + not next_code_is_dot and + not next_code_is_operator and + not is_implied_block and + not next_code_is_block): + token.metadata.is_implied_semicolon = True + self._EndStatement() + + def _StatementCouldEndInContext(self): + """Returns whether the current statement (if any) may end in this context.""" + # In the basic statement or variable declaration context, statement can + # always end in this context. + if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR): + return True + + # End of a ternary false branch inside a statement can also be the + # end of the statement, for example: + # var x = foo ? foo.bar() : null + # In this case the statement ends after the null, when the context stack + # looks like ternary_false > var > statement > root. + if (self._context.type == EcmaContext.TERNARY_FALSE and + self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)): + return True + + # In all other contexts like object and array literals, ternary true, etc. + # the statement can't yet end. + return False + + def _GetOperatorType(self, token): + """Returns the operator type of the given operator token. + + Args: + token: The token to get arity for. + + Returns: + The type of the operator. One of the *_OPERATOR constants defined in + EcmaMetaData. + """ + if token.string == '?': + return EcmaMetaData.TERNARY_OPERATOR + + if token.string in TokenType.UNARY_OPERATORS: + return EcmaMetaData.UNARY_OPERATOR + + last_code = token.metadata.last_code + if not last_code or last_code.type == TokenType.END_BLOCK: + return EcmaMetaData.UNARY_OPERATOR + + if (token.string in TokenType.UNARY_POST_OPERATORS and + last_code.type in TokenType.EXPRESSION_ENDER_TYPES): + return EcmaMetaData.UNARY_POST_OPERATOR + + if (token.string in TokenType.UNARY_OK_OPERATORS and + last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and + last_code.string not in TokenType.UNARY_POST_OPERATORS): + return EcmaMetaData.UNARY_OPERATOR + + return EcmaMetaData.BINARY_OPERATOR diff --git a/tools/closure_linter/closure_linter/error_fixer.py b/tools/closure_linter/closure_linter/error_fixer.py new file mode 100755 index 00000000000..904cf86605b --- /dev/null +++ b/tools/closure_linter/closure_linter/error_fixer.py @@ -0,0 +1,336 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main class responsible for automatically fixing simple style violations.""" + +__author__ = 'robbyw@google.com (Robert Walker)' + +import re + +import gflags as flags +from closure_linter import errors +from closure_linter import javascriptstatetracker +from closure_linter import javascripttokens +from closure_linter import tokenutil +from closure_linter.common import errorhandler + +# Shorthand +Token = javascripttokens.JavaScriptToken +Type = javascripttokens.JavaScriptTokenType + +END_OF_FLAG_TYPE = re.compile(r'(}?\s*)$') + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('disable_indentation_fixing', False, + 'Whether to disable automatic fixing of indentation.') + +class ErrorFixer(errorhandler.ErrorHandler): + """Object that fixes simple style errors.""" + + def __init__(self, external_file = None): + """Initialize the error fixer. + + Args: + external_file: If included, all output will be directed to this file + instead of overwriting the files the errors are found in. + """ + self._file_name = None + self._file_token = None + self._external_file = external_file + + def HandleFile(self, filename, first_token): + """Notifies this ErrorPrinter that subsequent errors are in filename. + + Args: + filename: The name of the file about to be checked. + first_token: The first token in the file. + """ + self._file_name = filename + self._file_token = first_token + self._file_fix_count = 0 + self._file_changed_lines = set() + + def _AddFix(self, tokens): + """Adds the fix to the internal count. + + Args: + tokens: The token or sequence of tokens changed to fix an error. + """ + self._file_fix_count += 1 + if hasattr(tokens, 'line_number'): + self._file_changed_lines.add(tokens.line_number) + else: + for token in tokens: + self._file_changed_lines.add(token.line_number) + + def HandleError(self, error): + """Attempts to fix the error. + + Args: + error: The error object + """ + code = error.code + token = error.token + + if code == errors.JSDOC_PREFER_QUESTION_TO_PIPE_NULL: + iterator = token.attached_object.type_start_token + if iterator.type == Type.DOC_START_BRACE or iterator.string.isspace(): + iterator = iterator.next + + leading_space = len(iterator.string) - len(iterator.string.lstrip()) + iterator.string = '%s?%s' % (' ' * leading_space, + iterator.string.lstrip()) + + # Cover the no outer brace case where the end token is part of the type. + while iterator and iterator != token.attached_object.type_end_token.next: + iterator.string = iterator.string.replace( + 'null|', '').replace('|null', '') + iterator = iterator.next + + # Create a new flag object with updated type info. + token.attached_object = javascriptstatetracker.JsDocFlag(token) + self._AddFix(token) + + elif code in (errors.MISSING_SEMICOLON_AFTER_FUNCTION, + errors.MISSING_SEMICOLON): + semicolon_token = Token(';', Type.SEMICOLON, token.line, + token.line_number) + tokenutil.InsertTokenAfter(semicolon_token, token) + token.metadata.is_implied_semicolon = False + semicolon_token.metadata.is_implied_semicolon = False + self._AddFix(token) + + elif code in (errors.ILLEGAL_SEMICOLON_AFTER_FUNCTION, + errors.REDUNDANT_SEMICOLON, + errors.COMMA_AT_END_OF_LITERAL): + tokenutil.DeleteToken(token) + self._AddFix(token) + + elif code == errors.INVALID_JSDOC_TAG: + if token.string == '@returns': + token.string = '@return' + self._AddFix(token) + + elif code == errors.FILE_MISSING_NEWLINE: + # This error is fixed implicitly by the way we restore the file + self._AddFix(token) + + elif code == errors.MISSING_SPACE: + if error.position: + if error.position.IsAtBeginning(): + tokenutil.InsertSpaceTokenAfter(token.previous) + elif error.position.IsAtEnd(token.string): + tokenutil.InsertSpaceTokenAfter(token) + else: + token.string = error.position.Set(token.string, ' ') + self._AddFix(token) + + elif code == errors.EXTRA_SPACE: + if error.position: + token.string = error.position.Set(token.string, '') + self._AddFix(token) + + elif code == errors.JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER: + token.string = error.position.Set(token.string, '.') + self._AddFix(token) + + elif code == errors.MISSING_LINE: + if error.position.IsAtBeginning(): + tokenutil.InsertLineAfter(token.previous) + else: + tokenutil.InsertLineAfter(token) + self._AddFix(token) + + elif code == errors.EXTRA_LINE: + tokenutil.DeleteToken(token) + self._AddFix(token) + + elif code == errors.WRONG_BLANK_LINE_COUNT: + if not token.previous: + # TODO(user): Add an insertBefore method to tokenutil. + return + + num_lines = error.fix_data + should_delete = False + + if num_lines < 0: + num_lines = num_lines * -1 + should_delete = True + + for i in xrange(1, num_lines + 1): + if should_delete: + # TODO(user): DeleteToken should update line numbers. + tokenutil.DeleteToken(token.previous) + else: + tokenutil.InsertLineAfter(token.previous) + self._AddFix(token) + + elif code == errors.UNNECESSARY_DOUBLE_QUOTED_STRING: + end_quote = tokenutil.Search(token, Type.DOUBLE_QUOTE_STRING_END) + if end_quote: + single_quote_start = Token("'", Type.SINGLE_QUOTE_STRING_START, + token.line, token.line_number) + single_quote_end = Token("'", Type.SINGLE_QUOTE_STRING_START, + end_quote.line, token.line_number) + + tokenutil.InsertTokenAfter(single_quote_start, token) + tokenutil.InsertTokenAfter(single_quote_end, end_quote) + tokenutil.DeleteToken(token) + tokenutil.DeleteToken(end_quote) + self._AddFix([token, end_quote]) + + elif code == errors.MISSING_BRACES_AROUND_TYPE: + fixed_tokens = [] + start_token = token.attached_object.type_start_token + + if start_token.type != Type.DOC_START_BRACE: + leading_space = (len(start_token.string) - + len(start_token.string.lstrip())) + if leading_space: + start_token = tokenutil.SplitToken(start_token, leading_space) + # Fix case where start and end token were the same. + if token.attached_object.type_end_token == start_token.previous: + token.attached_object.type_end_token = start_token + + new_token = Token("{", Type.DOC_START_BRACE, start_token.line, + start_token.line_number) + tokenutil.InsertTokenAfter(new_token, start_token.previous) + token.attached_object.type_start_token = new_token + fixed_tokens.append(new_token) + + end_token = token.attached_object.type_end_token + if end_token.type != Type.DOC_END_BRACE: + # If the start token was a brace, the end token will be a + # FLAG_ENDING_TYPE token, if there wasn't a starting brace then + # the end token is the last token of the actual type. + last_type = end_token + if not len(fixed_tokens): + last_type = end_token.previous + + while last_type.string.isspace(): + last_type = last_type.previous + + # If there was no starting brace then a lone end brace wouldn't have + # been type end token. Now that we've added any missing start brace, + # see if the last effective type token was an end brace. + if last_type.type != Type.DOC_END_BRACE: + trailing_space = (len(last_type.string) - + len(last_type.string.rstrip())) + if trailing_space: + tokenutil.SplitToken(last_type, + len(last_type.string) - trailing_space) + + new_token = Token("}", Type.DOC_END_BRACE, last_type.line, + last_type.line_number) + tokenutil.InsertTokenAfter(new_token, last_type) + token.attached_object.type_end_token = new_token + fixed_tokens.append(new_token) + + self._AddFix(fixed_tokens) + + elif code in (errors.GOOG_REQUIRES_NOT_ALPHABETIZED, + errors.GOOG_PROVIDES_NOT_ALPHABETIZED): + tokens = error.fix_data + strings = map(lambda x: x.string, tokens) + sorted_strings = sorted(strings) + + index = 0 + changed_tokens = [] + for token in tokens: + if token.string != sorted_strings[index]: + token.string = sorted_strings[index] + changed_tokens.append(token) + index += 1 + + self._AddFix(changed_tokens) + + elif code == errors.UNNECESSARY_BRACES_AROUND_INHERIT_DOC: + if token.previous.string == '{' and token.next.string == '}': + tokenutil.DeleteToken(token.previous) + tokenutil.DeleteToken(token.next) + self._AddFix([token]) + + elif (code == errors.WRONG_INDENTATION and + not FLAGS.disable_indentation_fixing): + token = tokenutil.GetFirstTokenInSameLine(token) + actual = error.position.start + expected = error.position.length + + if token.type in (Type.WHITESPACE, Type.PARAMETERS): + token.string = token.string.lstrip() + (' ' * expected) + self._AddFix([token]) + else: + # We need to add indentation. + new_token = Token(' ' * expected, Type.WHITESPACE, + token.line, token.line_number) + # Note that we'll never need to add indentation at the first line, + # since it will always not be indented. Therefore it's safe to assume + # token.previous exists. + tokenutil.InsertTokenAfter(new_token, token.previous) + self._AddFix([token]) + + elif code == errors.EXTRA_GOOG_REQUIRE: + fixed_tokens = [] + while token: + if token.type == Type.IDENTIFIER: + if token.string not in ['goog.require', 'goog.provide']: + # Stop iterating over tokens once we're out of the requires and + # provides. + break + if token.string == 'goog.require': + # Text of form: goog.require('required'), skipping past open paren + # and open quote to the string text. + required = token.next.next.next.string + if required in error.fix_data: + fixed_tokens.append(token) + # Want to delete: goog.require + open paren + open single-quote + + # text + close single-quote + close paren + semi-colon = 7. + tokenutil.DeleteTokens(token, 7) + token = token.next + + self._AddFix(fixed_tokens) + + def FinishFile(self): + """Called when the current file has finished style checking. + + Used to go back and fix any errors in the file. + """ + if self._file_fix_count: + f = self._external_file + if not f: + print "Fixed %d errors in %s" % (self._file_fix_count, self._file_name) + f = open(self._file_name, 'w') + + token = self._file_token + char_count = 0 + while token: + f.write(token.string) + char_count += len(token.string) + + if token.IsLastInLine(): + f.write('\n') + if char_count > 80 and token.line_number in self._file_changed_lines: + print "WARNING: Line %d of %s is now longer than 80 characters." % ( + token.line_number, self._file_name) + + char_count = 0 + self._file_changed_lines + + token = token.next + + if not self._external_file: + # Close the file if we created it + f.close() diff --git a/tools/closure_linter/closure_linter/errorrules.py b/tools/closure_linter/closure_linter/errorrules.py new file mode 100755 index 00000000000..afb6fa9606c --- /dev/null +++ b/tools/closure_linter/closure_linter/errorrules.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linter error rules class for Closure Linter.""" + +__author__ = 'robbyw@google.com (Robert Walker)' + +import gflags as flags +from closure_linter import errors + + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('jsdoc', True, + 'Whether to report errors for missing JsDoc.') + + +def ShouldReportError(error): + """Whether the given error should be reported. + + Returns: + True for all errors except missing documentation errors. For these, + it returns the value of the jsdoc flag. + """ + return FLAGS.jsdoc or error not in ( + errors.MISSING_PARAMETER_DOCUMENTATION, + errors.MISSING_RETURN_DOCUMENTATION, + errors.MISSING_MEMBER_DOCUMENTATION, + errors.MISSING_PRIVATE, + errors.MISSING_JSDOC_TAG_THIS) diff --git a/tools/closure_linter/closure_linter/errors.py b/tools/closure_linter/closure_linter/errors.py new file mode 100755 index 00000000000..7c86941f39c --- /dev/null +++ b/tools/closure_linter/closure_linter/errors.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Error codes for JavaScript style checker.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +def ByName(name): + """Get the error code for the given error name. + + Args: + name: The name of the error + + Returns: + The error code + """ + return globals()[name] + + +# "File-fatal" errors - these errors stop further parsing of a single file +FILE_NOT_FOUND = -1 +FILE_DOES_NOT_PARSE = -2 + +# Spacing +EXTRA_SPACE = 1 +MISSING_SPACE = 2 +EXTRA_LINE = 3 +MISSING_LINE = 4 +ILLEGAL_TAB = 5 +WRONG_INDENTATION = 6 +WRONG_BLANK_LINE_COUNT = 7 + +# Semicolons +MISSING_SEMICOLON = 10 +MISSING_SEMICOLON_AFTER_FUNCTION = 11 +ILLEGAL_SEMICOLON_AFTER_FUNCTION = 12 +REDUNDANT_SEMICOLON = 13 + +# Miscellaneous +ILLEGAL_PROTOTYPE_MEMBER_VALUE = 100 +LINE_TOO_LONG = 110 +LINE_STARTS_WITH_OPERATOR = 120 +COMMA_AT_END_OF_LITERAL = 121 +MULTI_LINE_STRING = 130 +UNNECESSARY_DOUBLE_QUOTED_STRING = 131 + +# Requires, provides +GOOG_REQUIRES_NOT_ALPHABETIZED = 140 +GOOG_PROVIDES_NOT_ALPHABETIZED = 141 +MISSING_GOOG_REQUIRE = 142 +MISSING_GOOG_PROVIDE = 143 +EXTRA_GOOG_REQUIRE = 144 + +# JsDoc +INVALID_JSDOC_TAG = 200 +INVALID_USE_OF_DESC_TAG = 201 +NO_BUG_NUMBER_AFTER_BUG_TAG = 202 +MISSING_PARAMETER_DOCUMENTATION = 210 +EXTRA_PARAMETER_DOCUMENTATION = 211 +WRONG_PARAMETER_DOCUMENTATION = 212 +MISSING_JSDOC_TAG_TYPE = 213 +MISSING_JSDOC_TAG_DESCRIPTION = 214 +MISSING_JSDOC_PARAM_NAME = 215 +OUT_OF_ORDER_JSDOC_TAG_TYPE = 216 +MISSING_RETURN_DOCUMENTATION = 217 +UNNECESSARY_RETURN_DOCUMENTATION = 218 +MISSING_BRACES_AROUND_TYPE = 219 +MISSING_MEMBER_DOCUMENTATION = 220 +MISSING_PRIVATE = 221 +EXTRA_PRIVATE = 222 +INVALID_OVERRIDE_PRIVATE = 223 +INVALID_INHERIT_DOC_PRIVATE = 224 +MISSING_JSDOC_TAG_THIS = 225 +UNNECESSARY_BRACES_AROUND_INHERIT_DOC = 226 +INVALID_AUTHOR_TAG_DESCRIPTION = 227 +JSDOC_PREFER_QUESTION_TO_PIPE_NULL = 230 +JSDOC_ILLEGAL_QUESTION_WITH_PIPE = 231 +JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER = 240 +# TODO(robbyw): Split this in to more specific syntax problems. +INCORRECT_SUPPRESS_SYNTAX = 250 +INVALID_SUPPRESS_TYPE = 251 +UNNECESSARY_SUPPRESS = 252 + +# File ending +FILE_MISSING_NEWLINE = 300 +FILE_IN_BLOCK = 301 + +# Interfaces +INTERFACE_CONSTRUCTOR_CANNOT_HAVE_PARAMS = 400 +INTERFACE_METHOD_CANNOT_HAVE_CODE = 401 + +# ActionScript specific errors: +# TODO(user): move these errors to their own file and move all JavaScript +# specific errors to their own file as well. +# All ActionScript specific errors should have error number at least 1000. +FUNCTION_MISSING_RETURN_TYPE = 1132 +PARAMETER_MISSING_TYPE = 1133 +VAR_MISSING_TYPE = 1134 +PARAMETER_MISSING_DEFAULT_VALUE = 1135 +IMPORTS_NOT_ALPHABETIZED = 1140 +IMPORT_CONTAINS_WILDCARD = 1141 +UNUSED_IMPORT = 1142 +INVALID_TRACE_SEVERITY_LEVEL = 1250 +MISSING_TRACE_SEVERITY_LEVEL = 1251 +MISSING_TRACE_MESSAGE = 1252 +REMOVE_TRACE_BEFORE_SUBMIT = 1253 +REMOVE_COMMENT_BEFORE_SUBMIT = 1254 +# End of list of ActionScript specific errors. + +NEW_ERRORS = frozenset([ + # Errors added after 2.0.2: + WRONG_INDENTATION, + MISSING_SEMICOLON, + # Errors added after 2.2.5: + WRONG_BLANK_LINE_COUNT, + EXTRA_GOOG_REQUIRE, + ]) diff --git a/tools/closure_linter/closure_linter/fixjsstyle.py b/tools/closure_linter/closure_linter/fixjsstyle.py new file mode 100755 index 00000000000..8782e648e6a --- /dev/null +++ b/tools/closure_linter/closure_linter/fixjsstyle.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Automatically fix simple style guide violations.""" + +__author__ = 'robbyw@google.com (Robert Walker)' + +import sys + +import gflags as flags +from closure_linter import checker +from closure_linter import error_fixer +from closure_linter.common import simplefileflags as fileflags + + +def main(argv = None): + """Main function. + + Args: + argv: Sequence of command line arguments. + """ + if argv is None: + argv = flags.FLAGS(sys.argv) + + files = fileflags.GetFileList(argv, 'JavaScript', ['.js']) + + style_checker = checker.JavaScriptStyleChecker(error_fixer.ErrorFixer()) + + # Check the list of files. + for filename in files: + style_checker.Check(filename) + +if __name__ == '__main__': + main() diff --git a/tools/closure_linter/closure_linter/fixjsstyle_test.py b/tools/closure_linter/closure_linter/fixjsstyle_test.py new file mode 100755 index 00000000000..42e9c593774 --- /dev/null +++ b/tools/closure_linter/closure_linter/fixjsstyle_test.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Medium tests for the gpylint auto-fixer.""" + +__author__ = 'robbyw@google.com (Robby Walker)' + +import StringIO + +import gflags as flags +import unittest as googletest +from closure_linter import checker +from closure_linter import error_fixer + +_RESOURCE_PREFIX = 'closure_linter/testdata' + +flags.FLAGS.strict = True +flags.FLAGS.limited_doc_files = ('dummy.js', 'externs.js') +flags.FLAGS.closurized_namespaces = ('goog', 'dummy') + +class FixJsStyleTest(googletest.TestCase): + """Test case to for gjslint auto-fixing.""" + + def testFixJsStyle(self): + input_filename = None + try: + input_filename = '%s/fixjsstyle.in.js' % (_RESOURCE_PREFIX) + + golden_filename = '%s/fixjsstyle.out.js' % (_RESOURCE_PREFIX) + except IOError, ex: + raise IOError('Could not find testdata resource for %s: %s' % + (self._filename, ex)) + + # Autofix the file, sending output to a fake file. + actual = StringIO.StringIO() + style_checker = checker.JavaScriptStyleChecker( + error_fixer.ErrorFixer(actual)) + style_checker.Check(input_filename) + + # Now compare the files. + actual.seek(0) + expected = open(golden_filename, 'r') + + self.assertEqual(actual.readlines(), expected.readlines()) + + +if __name__ == '__main__': + googletest.main() diff --git a/tools/closure_linter/closure_linter/full_test.py b/tools/closure_linter/closure_linter/full_test.py new file mode 100755 index 00000000000..f11f235493f --- /dev/null +++ b/tools/closure_linter/closure_linter/full_test.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Full regression-type (Medium) tests for gjslint. + +Tests every error that can be thrown by gjslint. Based heavily on +devtools/javascript/gpylint/full_test.py +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import re +import os +import sys +import unittest + +import gflags as flags +import unittest as googletest + +from closure_linter import checker +from closure_linter import errors +from closure_linter.common import filetestcase + +_RESOURCE_PREFIX = 'closure_linter/testdata' + +flags.FLAGS.strict = True +flags.FLAGS.custom_jsdoc_tags = ('customtag', 'requires') +flags.FLAGS.closurized_namespaces = ('goog', 'dummy') +flags.FLAGS.limited_doc_files = ('externs.js', 'dummy.js') + +# List of files under testdata to test. +# We need to list files explicitly since pyglib can't list directories. +_TEST_FILES = [ + 'all_js_wrapped.js', + 'blank_lines.js', + 'ends_with_block.js', + 'externs.js', + 'html_parse_error.html', + 'indentation.js', + 'interface.js', + 'jsdoc.js', + 'minimal.js', + 'other.js', + 'require_all_caps.js', + 'require_extra.js', + 'require_function.js', + 'require_function_missing.js', + 'require_function_through_both.js', + 'require_function_through_namespace.js', + 'require_interface.js', + 'require_lower_case.js', + 'require_numeric.js', + 'require_provide_ok.js', + 'require_provide_missing.js', + 'simple.html', + 'spaces.js', + 'tokenizer.js', + 'unparseable.js', + 'utf8.html' + ] + + +class GJsLintTestSuite(unittest.TestSuite): + """Test suite to run a GJsLintTest for each of several files. + + If sys.argv[1:] is non-empty, it is interpreted as a list of filenames in + testdata to test. Otherwise, _TEST_FILES is used. + """ + + def __init__(self, tests=()): + unittest.TestSuite.__init__(self, tests) + + argv = sys.argv and sys.argv[1:] or [] + if argv: + test_files = argv + else: + test_files = _TEST_FILES + for test_file in test_files: + resource_path = os.path.join(_RESOURCE_PREFIX, test_file) + self.addTest(filetestcase.AnnotatedFileTestCase(resource_path, + checker.GJsLintRunner(), errors.ByName)) + +if __name__ == '__main__': + # Don't let main parse args; it happens in the TestSuite. + googletest.main(argv=sys.argv[0:1], defaultTest='GJsLintTestSuite') diff --git a/tools/closure_linter/closure_linter/gjslint.py b/tools/closure_linter/closure_linter/gjslint.py new file mode 100755 index 00000000000..e33bdddc19f --- /dev/null +++ b/tools/closure_linter/closure_linter/gjslint.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Checks JavaScript files for common style guide violations. + +gjslint.py is designed to be used as a PRESUBMIT script to check for javascript +style guide violations. As of now, it checks for the following violations: + + * Missing and extra spaces + * Lines longer than 80 characters + * Missing newline at end of file + * Missing semicolon after function declaration + * Valid JsDoc including parameter matching + +Someday it will validate to the best of its ability against the entirety of the +JavaScript style guide. + +This file is a front end that parses arguments and flags. The core of the code +is in tokenizer.py and checker.py. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import sys +import time + +from closure_linter import checker +from closure_linter import errors +from closure_linter.common import errorprinter +from closure_linter.common import simplefileflags as fileflags +import gflags as flags + + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('unix_mode', False, + 'Whether to emit warnings in standard unix format.') +flags.DEFINE_boolean('beep', True, 'Whether to beep when errors are found.') +flags.DEFINE_boolean('time', False, 'Whether to emit timing statistics.') +flags.DEFINE_boolean('check_html', False, + 'Whether to check javascript in html files.') +flags.DEFINE_boolean('summary', False, + 'Whether to show an error count summary.') + +GJSLINT_ONLY_FLAGS = ['--unix_mode', '--beep', '--nobeep', '--time', + '--check_html', '--summary'] + + +def FormatTime(t): + """Formats a duration as a human-readable string. + + Args: + t: A duration in seconds. + + Returns: + A formatted duration string. + """ + if t < 1: + return '%dms' % round(t * 1000) + else: + return '%.2fs' % t + + +def main(argv = None): + """Main function. + + Args: + argv: Sequence of command line arguments. + """ + if argv is None: + argv = flags.FLAGS(sys.argv) + + if FLAGS.time: + start_time = time.time() + + suffixes = ['.js'] + if FLAGS.check_html: + suffixes += ['.html', '.htm'] + files = fileflags.GetFileList(argv, 'JavaScript', suffixes) + + error_handler = None + if FLAGS.unix_mode: + error_handler = errorprinter.ErrorPrinter(errors.NEW_ERRORS) + error_handler.SetFormat(errorprinter.UNIX_FORMAT) + + runner = checker.GJsLintRunner() + result = runner.Run(files, error_handler) + result.PrintSummary() + + exit_code = 0 + if result.HasOldErrors(): + exit_code += 1 + if result.HasNewErrors(): + exit_code += 2 + + if exit_code: + if FLAGS.summary: + result.PrintFileSummary() + + if FLAGS.beep: + # Make a beep noise. + sys.stdout.write(chr(7)) + + # Write out instructions for using fixjsstyle script to fix some of the + # reported errors. + fix_args = [] + for flag in sys.argv[1:]: + for f in GJSLINT_ONLY_FLAGS: + if flag.startswith(f): + break + else: + fix_args.append(flag) + + print """ +Some of the errors reported by GJsLint may be auto-fixable using the script +fixjsstyle. Please double check any changes it makes and report any bugs. The +script can be run by executing: + +fixjsstyle %s +""" % ' '.join(fix_args) + + if FLAGS.time: + print 'Done in %s.' % FormatTime(time.time() - start_time) + + sys.exit(exit_code) + + +if __name__ == '__main__': + main() diff --git a/tools/closure_linter/closure_linter/indentation.py b/tools/closure_linter/closure_linter/indentation.py new file mode 100755 index 00000000000..d740607530c --- /dev/null +++ b/tools/closure_linter/closure_linter/indentation.py @@ -0,0 +1,543 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods for checking EcmaScript files for indentation issues.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +from closure_linter import ecmametadatapass +from closure_linter import errors +from closure_linter import javascripttokens +from closure_linter import tokenutil +from closure_linter.common import error +from closure_linter.common import position + +import gflags as flags + +flags.DEFINE_boolean('debug_indentation', False, + 'Whether to print debugging information for indentation.') + + +# Shorthand +Context = ecmametadatapass.EcmaContext +Error = error.Error +Position = position.Position +Type = javascripttokens.JavaScriptTokenType + + +# The general approach: +# +# 1. Build a stack of tokens that can affect indentation. +# For each token, we determine if it is a block or continuation token. +# Some tokens need to be temporarily overwritten in case they are removed +# before the end of the line. +# Much of the work here is determining which tokens to keep on the stack +# at each point. Operators, for example, should be removed once their +# expression or line is gone, while parentheses must stay until the matching +# end parentheses is found. +# +# 2. Given that stack, determine the allowable indentations. +# Due to flexible indentation rules in JavaScript, there may be many +# allowable indentations for each stack. We follows the general +# "no false positives" approach of GJsLint and build the most permissive +# set possible. + + +class TokenInfo(object): + """Stores information about a token. + + Attributes: + token: The token + is_block: Whether the token represents a block indentation. + is_transient: Whether the token should be automatically removed without + finding a matching end token. + overridden_by: TokenInfo for a token that overrides the indentation that + this token would require. + is_permanent_override: Whether the override on this token should persist + even after the overriding token is removed from the stack. For example: + x([ + 1], + 2); + needs this to be set so the last line is not required to be a continuation + indent. + line_number: The effective line number of this token. Will either be the + actual line number or the one before it in the case of a mis-wrapped + operator. + """ + + def __init__(self, token, is_block=False): + """Initializes a TokenInfo object. + + Args: + token: The token + is_block: Whether the token represents a block indentation. + """ + self.token = token + self.overridden_by = None + self.is_permanent_override = False + self.is_block = is_block + self.is_transient = not is_block and not token.type in ( + Type.START_PAREN, Type.START_PARAMETERS) + self.line_number = token.line_number + + def __repr__(self): + result = '\n %s' % self.token + if self.overridden_by: + result = '%s OVERRIDDEN [by "%s"]' % ( + result, self.overridden_by.token.string) + result += ' {is_block: %s, is_transient: %s}' % ( + self.is_block, self.is_transient) + return result + + +class IndentationRules(object): + """EmcaScript indentation rules. + + Can be used to find common indentation errors in JavaScript, ActionScript and + other Ecma like scripting languages. + """ + + def __init__(self): + """Initializes the IndentationRules checker.""" + self._stack = [] + + # Map from line number to number of characters it is off in indentation. + self._start_index_offset = {} + + def Finalize(self): + if self._stack: + old_stack = self._stack + self._stack = [] + raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" % + old_stack) + + def CheckToken(self, token, state): + """Checks a token for indentation errors. + + Args: + token: The current token under consideration + state: Additional information about the current tree state + + Returns: + An error array [error code, error string, error token] if the token is + improperly indented, or None if indentation is correct. + """ + + token_type = token.type + indentation_errors = [] + stack = self._stack + is_first = self._IsFirstNonWhitespaceTokenInLine(token) + + # Add tokens that could decrease indentation before checking. + if token_type == Type.END_PAREN: + self._PopTo(Type.START_PAREN) + + elif token_type == Type.END_PARAMETERS: + self._PopTo(Type.START_PARAMETERS) + + elif token_type == Type.END_BRACKET: + self._PopTo(Type.START_BRACKET) + + elif token_type == Type.END_BLOCK: + self._PopTo(Type.START_BLOCK) + + elif token_type == Type.KEYWORD and token.string in ('case', 'default'): + self._Add(self._PopTo(Type.START_BLOCK)) + + elif is_first and token.string == '.': + # This token should have been on the previous line, so treat it as if it + # was there. + info = TokenInfo(token) + info.line_number = token.line_number - 1 + self._Add(info) + + elif token_type == Type.SEMICOLON: + self._PopTransient() + + not_binary_operator = (token_type != Type.OPERATOR or + token.metadata.IsUnaryOperator()) + not_dot = token.string != '.' + if is_first and not_binary_operator and not_dot and token.type not in ( + Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT): + if flags.FLAGS.debug_indentation: + print 'Line #%d: stack %r' % (token.line_number, stack) + + # Ignore lines that start in JsDoc since we don't check them properly yet. + # TODO(robbyw): Support checking JsDoc indentation. + # Ignore lines that start as multi-line strings since indentation is N/A. + # Ignore lines that start with operators since we report that already. + # Ignore lines with tabs since we report that already. + expected = self._GetAllowableIndentations() + actual = self._GetActualIndentation(token) + + # Special case comments describing else, case, and default. Allow them + # to outdent to the parent block. + if token_type in Type.COMMENT_TYPES: + next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + if next_code and next_code.type == Type.END_BLOCK: + next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) + if next_code and next_code.string in ('else', 'case', 'default'): + # TODO(robbyw): This almost certainly introduces false negatives. + expected |= self._AddToEach(expected, -2) + + if actual >= 0 and actual not in expected: + expected = sorted(expected) + indentation_errors.append([ + errors.WRONG_INDENTATION, + 'Wrong indentation: expected any of {%s} but got %d' % ( + ', '.join( + ['%d' % x for x in expected]), actual), + token, + Position(actual, expected[0])]) + self._start_index_offset[token.line_number] = expected[0] - actual + + # Add tokens that could increase indentation. + if token_type == Type.START_BRACKET: + self._Add(TokenInfo(token=token, + is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) + + elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: + self._Add(TokenInfo(token=token, is_block=True)) + + elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): + self._Add(TokenInfo(token=token, is_block=False)) + + elif token_type == Type.KEYWORD and token.string == 'return': + self._Add(TokenInfo(token)) + + elif not token.IsLastInLine() and ( + token.IsAssignment() or token.IsOperator('?')): + self._Add(TokenInfo(token=token)) + + # Handle implied block closes. + if token.metadata.is_implied_block_close: + self._PopToImpliedBlock() + + # Add some tokens only if they appear at the end of the line. + is_last = self._IsLastCodeInLine(token) + if is_last: + if token_type == Type.OPERATOR: + if token.string == ':': + if (stack and stack[-1].token.string == '?'): + # When a ternary : is on a different line than its '?', it doesn't + # add indentation. + if (token.line_number == stack[-1].token.line_number): + self._Add(TokenInfo(token)) + elif token.metadata.context.type == Context.CASE_BLOCK: + # Pop transient tokens from say, line continuations, e.g., + # case x. + # y: + # Want to pop the transient 4 space continuation indent. + self._PopTransient() + # Starting the body of the case statement, which is a type of + # block. + self._Add(TokenInfo(token=token, is_block=True)) + elif token.metadata.context.type == Context.LITERAL_ELEMENT: + # When in an object literal, acts as operator indicating line + # continuations. + self._Add(TokenInfo(token)) + pass + else: + # ':' might also be a statement label, no effect on indentation in + # this case. + pass + + elif token.string != ',': + self._Add(TokenInfo(token)) + else: + # The token is a comma. + if token.metadata.context.type == Context.VAR: + self._Add(TokenInfo(token)) + elif token.metadata.context.type != Context.PARAMETERS: + self._PopTransient() + + elif (token.string.endswith('.') + and token_type in (Type.IDENTIFIER, Type.NORMAL)): + self._Add(TokenInfo(token)) + elif token_type == Type.PARAMETERS and token.string.endswith(','): + # Parameter lists. + self._Add(TokenInfo(token)) + elif token.metadata.is_implied_semicolon: + self._PopTransient() + elif token.IsAssignment(): + self._Add(TokenInfo(token)) + + return indentation_errors + + def _AddToEach(self, original, amount): + """Returns a new set with the given amount added to each element. + + Args: + original: The original set of numbers + amount: The amount to add to each element + + Returns: + A new set containing each element of the original set added to the amount. + """ + return set([x + amount for x in original]) + + _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, + Type.START_BRACKET) + + _HARD_STOP_STRINGS = ('return', '?') + + def _IsHardStop(self, token): + """Determines if the given token can have a hard stop after it. + + Hard stops are indentations defined by the position of another token as in + indentation lined up with return, (, [, and ?. + """ + return (token.type in self._HARD_STOP_TYPES or + token.string in self._HARD_STOP_STRINGS or + token.IsAssignment()) + + def _GetAllowableIndentations(self): + """Computes the set of allowable indentations. + + Returns: + The set of allowable indentations, given the current stack. + """ + expected = set([0]) + hard_stops = set([]) + + # Whether the tokens are still in the same continuation, meaning additional + # indentation is optional. As an example: + # x = 5 + + # 6 + + # 7; + # The second '+' does not add any required indentation. + in_same_continuation = False + + for token_info in self._stack: + token = token_info.token + + # Handle normal additive indentation tokens. + if not token_info.overridden_by and token.string != 'return': + if token_info.is_block: + expected = self._AddToEach(expected, 2) + hard_stops = self._AddToEach(hard_stops, 2) + in_same_continuation = False + elif in_same_continuation: + expected |= self._AddToEach(expected, 4) + hard_stops |= self._AddToEach(hard_stops, 4) + else: + expected = self._AddToEach(expected, 4) + hard_stops |= self._AddToEach(hard_stops, 4) + in_same_continuation = True + + # Handle hard stops after (, [, return, =, and ? + if self._IsHardStop(token): + override_is_hard_stop = (token_info.overridden_by and + self._IsHardStop(token_info.overridden_by.token)) + if not override_is_hard_stop: + start_index = token.start_index + if token.line_number in self._start_index_offset: + start_index += self._start_index_offset[token.line_number] + if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and + not token_info.overridden_by): + hard_stops.add(start_index + 1) + + elif token.string == 'return' and not token_info.overridden_by: + hard_stops.add(start_index + 7) + + elif (token.type == Type.START_BRACKET): + hard_stops.add(start_index + 1) + + elif token.IsAssignment(): + hard_stops.add(start_index + len(token.string) + 1) + + elif token.IsOperator('?') and not token_info.overridden_by: + hard_stops.add(start_index + 2) + + return (expected | hard_stops) or set([0]) + + def _GetActualIndentation(self, token): + """Gets the actual indentation of the line containing the given token. + + Args: + token: Any token on the line. + + Returns: + The actual indentation of the line containing the given token. Returns + -1 if this line should be ignored due to the presence of tabs. + """ + # Move to the first token in the line + token = tokenutil.GetFirstTokenInSameLine(token) + + # If it is whitespace, it is the indentation. + if token.type == Type.WHITESPACE: + if token.string.find('\t') >= 0: + return -1 + else: + return len(token.string) + elif token.type == Type.PARAMETERS: + return len(token.string) - len(token.string.lstrip()) + else: + return 0 + + def _IsFirstNonWhitespaceTokenInLine(self, token): + """Determines if the given token is the first non-space token on its line. + + Args: + token: The token. + + Returns: + True if the token is the first non-whitespace token on its line. + """ + if token.type in (Type.WHITESPACE, Type.BLANK_LINE): + return False + if token.IsFirstInLine(): + return True + return (token.previous and token.previous.IsFirstInLine() and + token.previous.type == Type.WHITESPACE) + + def _IsLastCodeInLine(self, token): + """Determines if the given token is the last code token on its line. + + Args: + token: The token. + + Returns: + True if the token is the last code token on its line. + """ + if token.type in Type.NON_CODE_TYPES: + return False + start_token = token + while True: + token = token.next + if not token or token.line_number != start_token.line_number: + return True + if token.type not in Type.NON_CODE_TYPES: + return False + + def _Add(self, token_info): + """Adds the given token info to the stack. + + Args: + token_info: The token information to add. + """ + if self._stack and self._stack[-1].token == token_info.token: + # Don't add the same token twice. + return + + if token_info.is_block or token_info.token.type == Type.START_PAREN: + index = 1 + while index <= len(self._stack): + stack_info = self._stack[-index] + stack_token = stack_info.token + + if stack_info.line_number == token_info.line_number: + # In general, tokens only override each other when they are on + # the same line. + stack_info.overridden_by = token_info + if (token_info.token.type == Type.START_BLOCK and + (stack_token.IsAssignment() or + stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): + # Multi-line blocks have lasting overrides, as in: + # callFn({ + # a: 10 + # }, + # 30); + close_block = token_info.token.metadata.context.end_token + stack_info.is_permanent_override = \ + close_block.line_number != token_info.token.line_number + elif (token_info.token.type == Type.START_BLOCK and + token_info.token.metadata.context.type == Context.BLOCK and + (stack_token.IsAssignment() or + stack_token.type == Type.IDENTIFIER)): + # When starting a function block, the override can transcend lines. + # For example + # long.long.name = function( + # a) { + # In this case the { and the = are on different lines. But the + # override should still apply. + stack_info.overridden_by = token_info + stack_info.is_permanent_override = True + else: + break + index += 1 + + self._stack.append(token_info) + + def _Pop(self): + """Pops the top token from the stack. + + Returns: + The popped token info. + """ + token_info = self._stack.pop() + if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): + # Remove any temporary overrides. + self._RemoveOverrides(token_info) + else: + # For braces and brackets, which can be object and array literals, remove + # overrides when the literal is closed on the same line. + token_check = token_info.token + same_type = token_check.type + goal_type = None + if token_info.token.type == Type.START_BRACKET: + goal_type = Type.END_BRACKET + else: + goal_type = Type.END_BLOCK + line_number = token_info.token.line_number + count = 0 + while token_check and token_check.line_number == line_number: + if token_check.type == goal_type: + count -= 1 + if not count: + self._RemoveOverrides(token_info) + break + if token_check.type == same_type: + count += 1 + token_check = token_check.next + return token_info + + def _PopToImpliedBlock(self): + """Pops the stack until an implied block token is found.""" + while not self._Pop().token.metadata.is_implied_block: + pass + + def _PopTo(self, stop_type): + """Pops the stack until a token of the given type is popped. + + Args: + stop_type: The type of token to pop to. + + Returns: + The token info of the given type that was popped. + """ + last = None + while True: + last = self._Pop() + if last.token.type == stop_type: + break + return last + + def _RemoveOverrides(self, token_info): + """Marks any token that was overridden by this token as active again. + + Args: + token_info: The token that is being removed from the stack. + """ + for stack_token in self._stack: + if (stack_token.overridden_by == token_info and + not stack_token.is_permanent_override): + stack_token.overridden_by = None + + def _PopTransient(self): + """Pops all transient tokens - i.e. not blocks, literals, or parens.""" + while self._stack and self._stack[-1].is_transient: + self._Pop() diff --git a/tools/closure_linter/closure_linter/javascriptlintrules.py b/tools/closure_linter/closure_linter/javascriptlintrules.py new file mode 100755 index 00000000000..6b9f1be705c --- /dev/null +++ b/tools/closure_linter/closure_linter/javascriptlintrules.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods for checking JS files for common style guide violations. + +These style guide violations should only apply to JavaScript and not an Ecma +scripting languages. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)', + 'jacobr@google.com (Jacob Richman)') + +import gflags as flags +from closure_linter import ecmalintrules +from closure_linter import errors +from closure_linter import javascripttokenizer +from closure_linter import javascripttokens +from closure_linter import tokenutil +from closure_linter.common import error +from closure_linter.common import position + +FLAGS = flags.FLAGS +flags.DEFINE_list('closurized_namespaces', '', + 'Namespace prefixes, used for testing of' + 'goog.provide/require') +flags.DEFINE_list('ignored_extra_namespaces', '', + 'Fully qualified namespaces that should be not be reported ' + 'as extra by the linter.') + +# Shorthand +Error = error.Error +Position = position.Position +Type = javascripttokens.JavaScriptTokenType + + +class JavaScriptLintRules(ecmalintrules.EcmaScriptLintRules): + """JavaScript lint rules that catch JavaScript specific style errors.""" + + def HandleMissingParameterDoc(self, token, param_name): + """Handle errors associated with a parameter missing a param tag.""" + self._HandleError(errors.MISSING_PARAMETER_DOCUMENTATION, + 'Missing docs for parameter: "%s"' % param_name, token) + + def __ContainsRecordType(self, token): + """Check whether the given token contains a record type. + + Args: + token: The token being checked + """ + # If we see more than one left-brace in the string of an annotation token, + # then there's a record type in there. + return (token and token.type == Type.DOC_FLAG and + token.attached_object.type is not None and + token.attached_object.type.find('{') != token.string.rfind('{')) + + + def CheckToken(self, token, state): + """Checks a token, given the current parser_state, for warnings and errors. + + Args: + token: The current token under consideration + state: parser_state object that indicates the current state in the page + """ + if self.__ContainsRecordType(token): + # We should bail out and not emit any warnings for this annotation. + # TODO(nicksantos): Support record types for real. + state.GetDocComment().Invalidate() + return + + # Call the base class's CheckToken function. + super(JavaScriptLintRules, self).CheckToken(token, state) + + # Store some convenience variables + first_in_line = token.IsFirstInLine() + last_in_line = token.IsLastInLine() + type = token.type + + if type == Type.DOC_FLAG: + flag = token.attached_object + + if flag.flag_type == 'param' and flag.name_token is not None: + self._CheckForMissingSpaceBeforeToken( + token.attached_object.name_token) + + if flag.flag_type in state.GetDocFlag().HAS_TYPE: + # Check for both missing type token and empty type braces '{}' + # Missing suppress types are reported separately and we allow enums + # without types. + if (flag.flag_type not in ('suppress', 'enum') and + (flag.type == None or flag.type == '' or flag.type.isspace())): + self._HandleError(errors.MISSING_JSDOC_TAG_TYPE, + 'Missing type in %s tag' % token.string, token) + + elif flag.name_token and flag.type_end_token and tokenutil.Compare( + flag.type_end_token, flag.name_token) > 0: + self._HandleError( + errors.OUT_OF_ORDER_JSDOC_TAG_TYPE, + 'Type should be immediately after %s tag' % token.string, + token) + + elif type == Type.DOUBLE_QUOTE_STRING_START: + next = token.next + while next.type == Type.STRING_TEXT: + if javascripttokenizer.JavaScriptTokenizer.SINGLE_QUOTE.search( + next.string): + break + next = next.next + else: + self._HandleError( + errors.UNNECESSARY_DOUBLE_QUOTED_STRING, + 'Single-quoted string preferred over double-quoted string.', + token, + Position.All(token.string)) + + elif type == Type.END_DOC_COMMENT: + if (FLAGS.strict and not self._is_html and state.InTopLevel() and + not state.InBlock()): + + # Check if we're in a fileoverview or constructor JsDoc. + doc_comment = state.GetDocComment() + is_constructor = (doc_comment.HasFlag('constructor') or + doc_comment.HasFlag('interface')) + is_file_overview = doc_comment.HasFlag('fileoverview') + + # If the comment is not a file overview, and it does not immediately + # precede some code, skip it. + # NOTE: The tokenutil methods are not used here because of their + # behavior at the top of a file. + next = token.next + if (not next or + (not is_file_overview and next.type in Type.NON_CODE_TYPES)): + return + + # Find the start of this block (include comments above the block, unless + # this is a file overview). + block_start = doc_comment.start_token + if not is_file_overview: + token = block_start.previous + while token and token.type in Type.COMMENT_TYPES: + block_start = token + token = token.previous + + # Count the number of blank lines before this block. + blank_lines = 0 + token = block_start.previous + while token and token.type in [Type.WHITESPACE, Type.BLANK_LINE]: + if token.type == Type.BLANK_LINE: + # A blank line. + blank_lines += 1 + elif token.type == Type.WHITESPACE and not token.line.strip(): + # A line with only whitespace on it. + blank_lines += 1 + token = token.previous + + # Log errors. + error_message = False + expected_blank_lines = 0 + + if is_file_overview and blank_lines == 0: + error_message = 'Should have a blank line before a file overview.' + expected_blank_lines = 1 + elif is_constructor and blank_lines != 3: + error_message = ('Should have 3 blank lines before a constructor/' + 'interface.') + expected_blank_lines = 3 + elif not is_file_overview and not is_constructor and blank_lines != 2: + error_message = 'Should have 2 blank lines between top-level blocks.' + expected_blank_lines = 2 + + if error_message: + self._HandleError(errors.WRONG_BLANK_LINE_COUNT, error_message, + block_start, Position.AtBeginning(), + expected_blank_lines - blank_lines) + + elif type == Type.END_BLOCK: + if state.InFunction() and state.IsFunctionClose(): + is_immediately_called = (token.next and + token.next.type == Type.START_PAREN) + + function = state.GetFunction() + if not self._limited_doc_checks: + if (function.has_return and function.doc and + not is_immediately_called and + not function.doc.HasFlag('return') and + not function.doc.InheritsDocumentation() and + not function.doc.HasFlag('constructor')): + # Check for proper documentation of return value. + self._HandleError( + errors.MISSING_RETURN_DOCUMENTATION, + 'Missing @return JsDoc in function with non-trivial return', + function.doc.end_token, Position.AtBeginning()) + elif (not function.has_return and function.doc and + function.doc.HasFlag('return') and + not state.InInterfaceMethod()): + return_flag = function.doc.GetFlag('return') + if (return_flag.type is None or ( + 'undefined' not in return_flag.type and + 'void' not in return_flag.type and + '*' not in return_flag.type)): + self._HandleError( + errors.UNNECESSARY_RETURN_DOCUMENTATION, + 'Found @return JsDoc on function that returns nothing', + return_flag.flag_token, Position.AtBeginning()) + + if state.InFunction() and state.IsFunctionClose(): + is_immediately_called = (token.next and + token.next.type == Type.START_PAREN) + if (function.has_this and function.doc and + not function.doc.HasFlag('this') and + not function.is_constructor and + not function.is_interface and + '.prototype.' not in function.name): + self._HandleError( + errors.MISSING_JSDOC_TAG_THIS, + 'Missing @this JsDoc in function referencing "this". (' + 'this usually means you are trying to reference "this" in ' + 'a static function, or you have forgotten to mark a ' + 'constructor with @constructor)', + function.doc.end_token, Position.AtBeginning()) + + elif type == Type.IDENTIFIER: + if token.string == 'goog.inherits' and not state.InFunction(): + if state.GetLastNonSpaceToken().line_number == token.line_number: + self._HandleError( + errors.MISSING_LINE, + 'Missing newline between constructor and goog.inherits', + token, + Position.AtBeginning()) + + extra_space = state.GetLastNonSpaceToken().next + while extra_space != token: + if extra_space.type == Type.BLANK_LINE: + self._HandleError( + errors.EXTRA_LINE, + 'Extra line between constructor and goog.inherits', + extra_space) + extra_space = extra_space.next + + # TODO(robbyw): Test the last function was a constructor. + # TODO(robbyw): Test correct @extends and @implements documentation. + + elif type == Type.OPERATOR: + # If the token is unary and appears to be used in a unary context + # it's ok. Otherwise, if it's at the end of the line or immediately + # before a comment, it's ok. + # Don't report an error before a start bracket - it will be reported + # by that token's space checks. + if (not token.metadata.IsUnaryOperator() and not last_in_line + and not token.next.IsComment() + and not token.next.IsOperator(',') + and not token.next.type in (Type.WHITESPACE, Type.END_PAREN, + Type.END_BRACKET, Type.SEMICOLON, + Type.START_BRACKET)): + self._HandleError( + errors.MISSING_SPACE, + 'Missing space after "%s"' % token.string, + token, + Position.AtEnd(token.string)) + elif type == Type.WHITESPACE: + # Check whitespace length if it's not the first token of the line and + # if it's not immediately before a comment. + if not last_in_line and not first_in_line and not token.next.IsComment(): + # Ensure there is no space after opening parentheses. + if (token.previous.type in (Type.START_PAREN, Type.START_BRACKET, + Type.FUNCTION_NAME) + or token.next.type == Type.START_PARAMETERS): + self._HandleError( + errors.EXTRA_SPACE, + 'Extra space after "%s"' % token.previous.string, + token, + Position.All(token.string)) + + def Finalize(self, state, tokenizer_mode): + """Perform all checks that need to occur after all lines are processed.""" + # Call the base class's Finalize function. + super(JavaScriptLintRules, self).Finalize(state, tokenizer_mode) + + # Check for sorted requires statements. + goog_require_tokens = state.GetGoogRequireTokens() + requires = [require_token.string for require_token in goog_require_tokens] + sorted_requires = sorted(requires) + index = 0 + bad = False + for item in requires: + if item != sorted_requires[index]: + bad = True + break + index += 1 + + if bad: + self._HandleError( + errors.GOOG_REQUIRES_NOT_ALPHABETIZED, + 'goog.require classes must be alphabetized. The correct code is:\n' + + '\n'.join(map(lambda x: 'goog.require(\'%s\');' % x, + sorted_requires)), + goog_require_tokens[index], + position=Position.AtBeginning(), + fix_data=goog_require_tokens) + + # Check for sorted provides statements. + goog_provide_tokens = state.GetGoogProvideTokens() + provides = [provide_token.string for provide_token in goog_provide_tokens] + sorted_provides = sorted(provides) + index = 0 + bad = False + for item in provides: + if item != sorted_provides[index]: + bad = True + break + index += 1 + + if bad: + self._HandleError( + errors.GOOG_PROVIDES_NOT_ALPHABETIZED, + 'goog.provide classes must be alphabetized. The correct code is:\n' + + '\n'.join(map(lambda x: 'goog.provide(\'%s\');' % x, + sorted_provides)), + goog_provide_tokens[index], + position=Position.AtBeginning(), + fix_data=goog_provide_tokens) + + if FLAGS.closurized_namespaces: + # Check that we provide everything we need. + provided_namespaces = state.GetProvidedNamespaces() + missing_provides = provided_namespaces - set(provides) + if missing_provides: + self._HandleError( + errors.MISSING_GOOG_PROVIDE, + 'Missing the following goog.provide statements:\n' + + '\n'.join(map(lambda x: 'goog.provide(\'%s\');' % x, + sorted(missing_provides))), + state.GetFirstToken(), position=Position.AtBeginning(), + fix_data=missing_provides) + + # Compose a set of all available namespaces. Explicitly omit goog + # because if you can call goog.require, you already have goog. + available_namespaces = (set(requires) | set(provides) | set(['goog']) | + provided_namespaces) + + # Check that we require everything we need. + missing_requires = set() + for namespace_variants in state.GetUsedNamespaces(): + # Namespace variants is a list of potential things to require. If we + # find we're missing one, we are lazy and choose to require the first + # in the sequence - which should be the namespace. + if not set(namespace_variants) & available_namespaces: + missing_requires.add(namespace_variants[0]) + + if missing_requires: + self._HandleError( + errors.MISSING_GOOG_REQUIRE, + 'Missing the following goog.require statements:\n' + + '\n'.join(map(lambda x: 'goog.require(\'%s\');' % x, + sorted(missing_requires))), + state.GetFirstToken(), position=Position.AtBeginning(), + fix_data=missing_requires) + + # Check that we don't require things we don't actually use. + namespace_variants = state.GetUsedNamespaces() + used_namespaces = set() + for a, b in namespace_variants: + used_namespaces.add(a) + used_namespaces.add(b) + + extra_requires = set() + for i in requires: + baseNamespace = i.split('.')[0] + if (i not in used_namespaces and + baseNamespace in FLAGS.closurized_namespaces and + i not in FLAGS.ignored_extra_namespaces): + extra_requires.add(i) + + if extra_requires: + self._HandleError( + errors.EXTRA_GOOG_REQUIRE, + 'The following goog.require statements appear unnecessary:\n' + + '\n'.join(map(lambda x: 'goog.require(\'%s\');' % x, + sorted(extra_requires))), + state.GetFirstToken(), position=Position.AtBeginning(), + fix_data=extra_requires) + diff --git a/tools/closure_linter/closure_linter/javascriptstatetracker.py b/tools/closure_linter/closure_linter/javascriptstatetracker.py new file mode 100755 index 00000000000..9cce37632ee --- /dev/null +++ b/tools/closure_linter/closure_linter/javascriptstatetracker.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parser for JavaScript files.""" + + + +from closure_linter import javascripttokens +from closure_linter import statetracker +from closure_linter import tokenutil + +# Shorthand +Type = javascripttokens.JavaScriptTokenType + + +class JsDocFlag(statetracker.DocFlag): + """Javascript doc flag object. + + Attribute: + flag_type: param, return, define, type, etc. + flag_token: The flag token. + type_start_token: The first token specifying the flag JS type, + including braces. + type_end_token: The last token specifying the flag JS type, + including braces. + type: The JavaScript type spec. + name_token: The token specifying the flag name. + name: The flag name + description_start_token: The first token in the description. + description_end_token: The end token in the description. + description: The description. + """ + + # Please keep these lists alphabetized. + + # Some projects use the following extensions to JsDoc. + # TODO(robbyw): determine which of these, if any, should be illegal. + EXTENDED_DOC = frozenset([ + 'class', 'code', 'desc', 'final', 'hidden', 'inheritDoc', 'link', + 'protected', 'notypecheck', 'throws']) + + LEGAL_DOC = EXTENDED_DOC | statetracker.DocFlag.LEGAL_DOC + + def __init__(self, flag_token): + """Creates the JsDocFlag object and attaches it to the given start token. + + Args: + flag_token: The starting token of the flag. + """ + statetracker.DocFlag.__init__(self, flag_token) + + +class JavaScriptStateTracker(statetracker.StateTracker): + """JavaScript state tracker. + + Inherits from the core EcmaScript StateTracker adding extra state tracking + functionality needed for JavaScript. + """ + + def __init__(self, closurized_namespaces=''): + """Initializes a JavaScript token stream state tracker. + + Args: + closurized_namespaces: An optional list of namespace prefixes used for + testing of goog.provide/require. + """ + statetracker.StateTracker.__init__(self, JsDocFlag) + self.__closurized_namespaces = closurized_namespaces + + def Reset(self): + """Resets the state tracker to prepare for processing a new page.""" + super(JavaScriptStateTracker, self).Reset() + + self.__goog_require_tokens = [] + self.__goog_provide_tokens = [] + self.__provided_namespaces = set() + self.__used_namespaces = [] + + def InTopLevel(self): + """Compute whether we are at the top level in the class. + + This function call is language specific. In some languages like + JavaScript, a function is top level if it is not inside any parenthesis. + In languages such as ActionScript, a function is top level if it is directly + within a class. + + Returns: + Whether we are at the top level in the class. + """ + return not self.InParentheses() + + def GetGoogRequireTokens(self): + """Returns list of require tokens.""" + return self.__goog_require_tokens + + def GetGoogProvideTokens(self): + """Returns list of provide tokens.""" + return self.__goog_provide_tokens + + def GetProvidedNamespaces(self): + """Returns list of provided namespaces.""" + return self.__provided_namespaces + + def GetUsedNamespaces(self): + """Returns list of used namespaces, is a list of sequences.""" + return self.__used_namespaces + + def GetBlockType(self, token): + """Determine the block type given a START_BLOCK token. + + Code blocks come after parameters, keywords like else, and closing parens. + + Args: + token: The current token. Can be assumed to be type START_BLOCK + Returns: + Code block type for current token. + """ + last_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES, None, + True) + if last_code.type in (Type.END_PARAMETERS, Type.END_PAREN, + Type.KEYWORD) and not last_code.IsKeyword('return'): + return self.CODE + else: + return self.OBJECT_LITERAL + + def HandleToken(self, token, last_non_space_token): + """Handles the given token and updates state. + + Args: + token: The token to handle. + last_non_space_token: + """ + super(JavaScriptStateTracker, self).HandleToken(token, + last_non_space_token) + + if token.IsType(Type.IDENTIFIER): + if token.string == 'goog.require': + class_token = tokenutil.Search(token, Type.STRING_TEXT) + self.__goog_require_tokens.append(class_token) + + elif token.string == 'goog.provide': + class_token = tokenutil.Search(token, Type.STRING_TEXT) + self.__goog_provide_tokens.append(class_token) + + elif self.__closurized_namespaces: + self.__AddUsedNamespace(token.string) + if token.IsType(Type.SIMPLE_LVALUE) and not self.InFunction(): + identifier = token.values['identifier'] + + if self.__closurized_namespaces: + namespace = self.GetClosurizedNamespace(identifier) + if namespace and identifier == namespace: + self.__provided_namespaces.add(namespace) + if (self.__closurized_namespaces and + token.IsType(Type.DOC_FLAG) and + token.attached_object.flag_type == 'implements'): + # Interfaces should be goog.require'd. + doc_start = tokenutil.Search(token, Type.DOC_START_BRACE) + interface = tokenutil.Search(doc_start, Type.COMMENT) + self.__AddUsedNamespace(interface.string) + + def __AddUsedNamespace(self, identifier): + """Adds the namespace of an identifier to the list of used namespaces. + + Args: + identifier: An identifier which has been used. + """ + namespace = self.GetClosurizedNamespace(identifier) + + if namespace: + # We add token.string as a 'namespace' as it is something that could + # potentially be provided to satisfy this dependency. + self.__used_namespaces.append([namespace, identifier]) + + def GetClosurizedNamespace(self, identifier): + """Given an identifier, returns the namespace that identifier is from. + + Args: + identifier: The identifier to extract a namespace from. + + Returns: + The namespace the given identifier resides in, or None if one could not + be found. + """ + parts = identifier.split('.') + for part in parts: + if part.endswith('_'): + # Ignore private variables / inner classes. + return None + + if identifier.startswith('goog.global'): + # Ignore goog.global, since it is, by definition, global. + return None + + for namespace in self.__closurized_namespaces: + if identifier.startswith(namespace + '.'): + last_part = parts[-1] + if not last_part: + # TODO(robbyw): Handle this: it's a multi-line identifier. + return None + + if last_part in ('apply', 'inherits', 'call'): + # Calling one of Function's methods usually indicates use of a + # superclass. + parts.pop() + last_part = parts[-1] + + for i in xrange(1, len(parts)): + part = parts[i] + if part.isupper(): + # If an identifier is of the form foo.bar.BAZ.x or foo.bar.BAZ, + # the namespace is foo.bar. + return '.'.join(parts[:i]) + if part == 'prototype': + # If an identifier is of the form foo.bar.prototype.x, the + # namespace is foo.bar. + return '.'.join(parts[:i]) + + if last_part.isupper() or not last_part[0].isupper(): + # Strip off the last part of an enum or constant reference. + parts.pop() + + return '.'.join(parts) + + return None diff --git a/tools/closure_linter/closure_linter/javascriptstatetracker_test.py b/tools/closure_linter/closure_linter/javascriptstatetracker_test.py new file mode 100755 index 00000000000..e4288b7b647 --- /dev/null +++ b/tools/closure_linter/closure_linter/javascriptstatetracker_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for JavaScriptStateTracker.""" + + + +import unittest as googletest +from closure_linter import javascriptstatetracker + +class JavaScriptStateTrackerTest(googletest.TestCase): + + __test_cases = { + 'package.CONSTANT' : 'package', + 'package.methodName' : 'package', + 'package.subpackage.methodName' : 'package.subpackage', + 'package.ClassName.something' : 'package.ClassName', + 'package.ClassName.Enum.VALUE.methodName' : 'package.ClassName.Enum', + 'package.ClassName.CONSTANT' : 'package.ClassName', + 'package.ClassName.inherits' : 'package.ClassName', + 'package.ClassName.apply' : 'package.ClassName', + 'package.ClassName.methodName.apply' : 'package.ClassName', + 'package.ClassName.methodName.call' : 'package.ClassName', + 'package.ClassName.prototype.methodName' : 'package.ClassName', + 'package.ClassName.privateMethod_' : None, + 'package.ClassName.prototype.methodName.apply' : 'package.ClassName' + } + + def testGetClosurizedNamespace(self): + stateTracker = javascriptstatetracker.JavaScriptStateTracker(['package']) + for identifier, expected_namespace in self.__test_cases.items(): + actual_namespace = stateTracker.GetClosurizedNamespace(identifier) + self.assertEqual(expected_namespace, actual_namespace, + 'expected namespace "' + str(expected_namespace) + + '" for identifier "' + str(identifier) + '" but was "' + + str(actual_namespace) + '"') + +if __name__ == '__main__': + googletest.main() + diff --git a/tools/closure_linter/closure_linter/javascripttokenizer.py b/tools/closure_linter/closure_linter/javascripttokenizer.py new file mode 100755 index 00000000000..097d3fd1164 --- /dev/null +++ b/tools/closure_linter/closure_linter/javascripttokenizer.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regular expression based JavaScript parsing classes.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import copy +import re + +from closure_linter import javascripttokens +from closure_linter.common import matcher +from closure_linter.common import tokenizer + +# Shorthand +Type = javascripttokens.JavaScriptTokenType +Matcher = matcher.Matcher + + +class JavaScriptModes(object): + """Enumeration of the different matcher modes used for JavaScript.""" + TEXT_MODE = 'text' + SINGLE_QUOTE_STRING_MODE = 'single_quote_string' + DOUBLE_QUOTE_STRING_MODE = 'double_quote_string' + BLOCK_COMMENT_MODE = 'block_comment' + DOC_COMMENT_MODE = 'doc_comment' + DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces' + LINE_COMMENT_MODE = 'line_comment' + PARAMETER_MODE = 'parameter' + FUNCTION_MODE = 'function' + + +class JavaScriptTokenizer(tokenizer.Tokenizer): + """JavaScript tokenizer. + + Convert JavaScript code in to an array of tokens. + """ + + # Useful patterns for JavaScript parsing. + IDENTIFIER_CHAR = r'A-Za-z0-9_$.'; + + # Number patterns based on: + # http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html + MANTISSA = r""" + (\d+(?!\.)) | # Matches '10' + (\d+\.(?!\d)) | # Matches '10.' + (\d*\.\d+) # Matches '.5' or '10.5' + """ + DECIMAL_LITERAL = r'(%s)([eE][-+]?\d+)?' % MANTISSA + HEX_LITERAL = r'0[xX][0-9a-fA-F]+' + NUMBER = re.compile(r""" + ((%s)|(%s)) + """ % (HEX_LITERAL, DECIMAL_LITERAL), re.VERBOSE) + + # Strings come in three parts - first we match the start of the string, then + # the contents, then the end. The contents consist of any character except a + # backslash or end of string, or a backslash followed by any character, or a + # backslash followed by end of line to support correct parsing of multi-line + # strings. + SINGLE_QUOTE = re.compile(r"'") + SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+") + DOUBLE_QUOTE = re.compile(r'"') + DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+') + + START_SINGLE_LINE_COMMENT = re.compile(r'//') + END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$') + + START_DOC_COMMENT = re.compile(r'/\*\*') + START_BLOCK_COMMENT = re.compile(r'/\*') + END_BLOCK_COMMENT = re.compile(r'\*/') + BLOCK_COMMENT_TEXT = re.compile(r'([^*]|\*(?!/))+') + + # Comment text is anything that we are not going to parse into another special + # token like (inline) flags or end comments. Complicated regex to match + # most normal characters, and '*', '{', '}', and '@' when we are sure that + # it is safe. Expression [^*{\s]@ must come first, or the other options will + # match everything before @, and we won't match @'s that aren't part of flags + # like in email addresses in the @author tag. + DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+') + DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+') + + # Match the prefix ' * ' that starts every line of jsdoc. Want to include + # spaces after the '*', but nothing else that occurs after a '*', and don't + # want to match the '*' in '*/'. + DOC_PREFIX = re.compile(r'\s*\*(\s+|(?!/))') + + START_BLOCK = re.compile('{') + END_BLOCK = re.compile('}') + + REGEX_CHARACTER_CLASS = r""" + \[ # Opening bracket + ([^\]\\]|\\.)* # Anything but a ] or \, + # or a backslash followed by anything + \] # Closing bracket + """ + # We ensure the regex is followed by one of the above tokens to avoid + # incorrectly parsing something like x / y / z as x REGEX(/ y /) z + POST_REGEX_LIST = [ + ';', ',', r'\.', r'\)', r'\]', '$', r'\/\/', r'\/\*', ':', '}'] + + REGEX = re.compile(r""" + / # opening slash + (?!\*) # not the start of a comment + (\\.|[^\[\/\\]|(%s))* # a backslash followed by anything, + # or anything but a / or [ or \, + # or a character class + / # closing slash + [gimsx]* # optional modifiers + (?=\s*(%s)) + """ % (REGEX_CHARACTER_CLASS, '|'.join(POST_REGEX_LIST)), + re.VERBOSE) + + ANYTHING = re.compile(r'.*') + PARAMETERS = re.compile(r'[^\)]+') + CLOSING_PAREN_WITH_SPACE = re.compile(r'\)\s*') + + FUNCTION_DECLARATION = re.compile(r'\bfunction\b') + + OPENING_PAREN = re.compile(r'\(') + CLOSING_PAREN = re.compile(r'\)') + + OPENING_BRACKET = re.compile(r'\[') + CLOSING_BRACKET = re.compile(r'\]') + + # We omit these JS keywords from the list: + # function - covered by FUNCTION_DECLARATION. + # delete, in, instanceof, new, typeof - included as operators. + # this - included in identifiers. + # null, undefined - not included, should go in some "special constant" list. + KEYWORD_LIST = ['break', 'case', 'catch', 'continue', 'default', 'do', 'else', + 'finally', 'for', 'if', 'return', 'switch', 'throw', 'try', 'var', + 'while', 'with'] + # Match a keyword string followed by a non-identifier character in order to + # not match something like doSomething as do + Something. + KEYWORD = re.compile('(%s)((?=[^%s])|$)' % ( + '|'.join(KEYWORD_LIST), IDENTIFIER_CHAR)) + + # List of regular expressions to match as operators. Some notes: for our + # purposes, the comma behaves similarly enough to a normal operator that we + # include it here. r'\bin\b' actually matches 'in' surrounded by boundary + # characters - this may not match some very esoteric uses of the in operator. + # Operators that are subsets of larger operators must come later in this list + # for proper matching, e.g., '>>' must come AFTER '>>>'. + OPERATOR_LIST = [',', r'\+\+', '===', '!==', '>>>=', '>>>', '==', '>=', '<=', + '!=', '<<=', '>>=', '<<', '>>', '>', '<', r'\+=', r'\+', + '--', '\^=', '-=', '-', '/=', '/', r'\*=', r'\*', '%=', '%', + '&&', r'\|\|', '&=', '&', r'\|=', r'\|', '=', '!', ':', '\?', + r'\bdelete\b', r'\bin\b', r'\binstanceof\b', r'\bnew\b', + r'\btypeof\b', r'\bvoid\b'] + OPERATOR = re.compile('|'.join(OPERATOR_LIST)) + + WHITESPACE = re.compile(r'\s+') + SEMICOLON = re.compile(r';') + # Technically JavaScript identifiers can't contain '.', but we treat a set of + # nested identifiers as a single identifier. + NESTED_IDENTIFIER = r'[a-zA-Z_$][%s.]*' % IDENTIFIER_CHAR + IDENTIFIER = re.compile(NESTED_IDENTIFIER) + + SIMPLE_LVALUE = re.compile(r""" + (?P%s) # a valid identifier + (?=\s* # optional whitespace + \= # look ahead to equal sign + (?!=)) # not follwed by equal + """ % NESTED_IDENTIFIER, re.VERBOSE) + + # A doc flag is a @ sign followed by non-space characters that appears at the + # beginning of the line, after whitespace, or after a '{'. The look-behind + # check is necessary to not match someone@google.com as a flag. + DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P[a-zA-Z]+)') + # To properly parse parameter names, we need to tokenize whitespace into a + # token. + DOC_FLAG_LEX_SPACES = re.compile(r'(^|(?<=\s))@(?P%s)\b' % + '|'.join(['param'])) + + DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P[a-zA-Z]+)') + + # Star followed by non-slash, i.e a star that does not end a comment. + # This is used for TYPE_GROUP below. + SAFE_STAR = r'(\*(?!/))' + + COMMON_DOC_MATCHERS = [ + # Find the end of the comment. + Matcher(END_BLOCK_COMMENT, Type.END_DOC_COMMENT, + JavaScriptModes.TEXT_MODE), + + # Tokenize documented flags like @private. + Matcher(DOC_INLINE_FLAG, Type.DOC_INLINE_FLAG), + Matcher(DOC_FLAG_LEX_SPACES, Type.DOC_FLAG, + JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE), + Matcher(DOC_FLAG, Type.DOC_FLAG), + + # Tokenize braces so we can find types. + Matcher(START_BLOCK, Type.DOC_START_BRACE), + Matcher(END_BLOCK, Type.DOC_END_BRACE), + Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)] + + + # The token matcher groups work as follows: it is an list of Matcher objects. + # The matchers will be tried in this order, and the first to match will be + # returned. Hence the order is important because the matchers that come first + # overrule the matchers that come later. + JAVASCRIPT_MATCHERS = { + # Matchers for basic text mode. + JavaScriptModes.TEXT_MODE: [ + # Check a big group - strings, starting comments, and regexes - all + # of which could be intertwined. 'string with /regex/', + # /regex with 'string'/, /* comment with /regex/ and string */ (and so on) + Matcher(START_DOC_COMMENT, Type.START_DOC_COMMENT, + JavaScriptModes.DOC_COMMENT_MODE), + Matcher(START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT, + JavaScriptModes.BLOCK_COMMENT_MODE), + Matcher(END_OF_LINE_SINGLE_LINE_COMMENT, + Type.START_SINGLE_LINE_COMMENT), + Matcher(START_SINGLE_LINE_COMMENT, Type.START_SINGLE_LINE_COMMENT, + JavaScriptModes.LINE_COMMENT_MODE), + Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START, + JavaScriptModes.SINGLE_QUOTE_STRING_MODE), + Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START, + JavaScriptModes.DOUBLE_QUOTE_STRING_MODE), + Matcher(REGEX, Type.REGEX), + + # Next we check for start blocks appearing outside any of the items above. + Matcher(START_BLOCK, Type.START_BLOCK), + Matcher(END_BLOCK, Type.END_BLOCK), + + # Then we search for function declarations. + Matcher(FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION, + JavaScriptModes.FUNCTION_MODE), + + # Next, we convert non-function related parens to tokens. + Matcher(OPENING_PAREN, Type.START_PAREN), + Matcher(CLOSING_PAREN, Type.END_PAREN), + + # Next, we convert brackets to tokens. + Matcher(OPENING_BRACKET, Type.START_BRACKET), + Matcher(CLOSING_BRACKET, Type.END_BRACKET), + + # Find numbers. This has to happen before operators because scientific + # notation numbers can have + and - in them. + Matcher(NUMBER, Type.NUMBER), + + # Find operators and simple assignments + Matcher(SIMPLE_LVALUE, Type.SIMPLE_LVALUE), + Matcher(OPERATOR, Type.OPERATOR), + + # Find key words and whitespace + Matcher(KEYWORD, Type.KEYWORD), + Matcher(WHITESPACE, Type.WHITESPACE), + + # Find identifiers + Matcher(IDENTIFIER, Type.IDENTIFIER), + + # Finally, we convert semicolons to tokens. + Matcher(SEMICOLON, Type.SEMICOLON)], + + + # Matchers for single quote strings. + JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [ + Matcher(SINGLE_QUOTE_TEXT, Type.STRING_TEXT), + Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END, + JavaScriptModes.TEXT_MODE)], + + + # Matchers for double quote strings. + JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [ + Matcher(DOUBLE_QUOTE_TEXT, Type.STRING_TEXT), + Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END, + JavaScriptModes.TEXT_MODE)], + + + # Matchers for block comments. + JavaScriptModes.BLOCK_COMMENT_MODE: [ + # First we check for exiting a block comment. + Matcher(END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT, + JavaScriptModes.TEXT_MODE), + + # Match non-comment-ending text.. + Matcher(BLOCK_COMMENT_TEXT, Type.COMMENT)], + + + # Matchers for doc comments. + JavaScriptModes.DOC_COMMENT_MODE: COMMON_DOC_MATCHERS + [ + Matcher(DOC_COMMENT_TEXT, Type.COMMENT)], + + JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: COMMON_DOC_MATCHERS + [ + Matcher(WHITESPACE, Type.COMMENT), + Matcher(DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)], + + # Matchers for single line comments. + JavaScriptModes.LINE_COMMENT_MODE: [ + # We greedy match until the end of the line in line comment mode. + Matcher(ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)], + + + # Matchers for code after the function keyword. + JavaScriptModes.FUNCTION_MODE: [ + # Must match open paren before anything else and move into parameter mode, + # otherwise everything inside the parameter list is parsed incorrectly. + Matcher(OPENING_PAREN, Type.START_PARAMETERS, + JavaScriptModes.PARAMETER_MODE), + Matcher(WHITESPACE, Type.WHITESPACE), + Matcher(IDENTIFIER, Type.FUNCTION_NAME)], + + + # Matchers for function parameters + JavaScriptModes.PARAMETER_MODE: [ + # When in function parameter mode, a closing paren is treated specially. + # Everything else is treated as lines of parameters. + Matcher(CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS, + JavaScriptModes.TEXT_MODE), + Matcher(PARAMETERS, Type.PARAMETERS, JavaScriptModes.PARAMETER_MODE)]} + + + # When text is not matched, it is given this default type based on mode. + # If unspecified in this map, the default default is Type.NORMAL. + JAVASCRIPT_DEFAULT_TYPES = { + JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT, + JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT + } + + def __init__(self, parse_js_doc = True): + """Create a tokenizer object. + + Args: + parse_js_doc: Whether to do detailed parsing of javascript doc comments, + or simply treat them as normal comments. Defaults to parsing JsDoc. + """ + matchers = self.JAVASCRIPT_MATCHERS + if not parse_js_doc: + # Make a copy so the original doesn't get modified. + matchers = copy.deepcopy(matchers) + matchers[JavaScriptModes.DOC_COMMENT_MODE] = matchers[ + JavaScriptModes.BLOCK_COMMENT_MODE] + + tokenizer.Tokenizer.__init__(self, JavaScriptModes.TEXT_MODE, matchers, + self.JAVASCRIPT_DEFAULT_TYPES) + + def _CreateToken(self, string, token_type, line, line_number, values=None): + """Creates a new JavaScriptToken object. + + Args: + string: The string of input the token contains. + token_type: The type of token. + line: The text of the line this token is in. + line_number: The line number of the token. + values: A dict of named values within the token. For instance, a + function declaration may have a value called 'name' which captures the + name of the function. + """ + return javascripttokens.JavaScriptToken(string, token_type, line, + line_number, values) diff --git a/tools/closure_linter/closure_linter/javascripttokens.py b/tools/closure_linter/closure_linter/javascripttokens.py new file mode 100755 index 00000000000..f46d4e17bc8 --- /dev/null +++ b/tools/closure_linter/closure_linter/javascripttokens.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes to represent JavaScript tokens.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import tokens + +class JavaScriptTokenType(tokens.TokenType): + """Enumeration of JavaScript token types, and useful sets of token types.""" + NUMBER = 'number' + START_SINGLE_LINE_COMMENT = '//' + START_BLOCK_COMMENT = '/*' + START_DOC_COMMENT = '/**' + END_BLOCK_COMMENT = '*/' + END_DOC_COMMENT = 'doc */' + COMMENT = 'comment' + SINGLE_QUOTE_STRING_START = "'string" + SINGLE_QUOTE_STRING_END = "string'" + DOUBLE_QUOTE_STRING_START = '"string' + DOUBLE_QUOTE_STRING_END = 'string"' + STRING_TEXT = 'string' + START_BLOCK = '{' + END_BLOCK = '}' + START_PAREN = '(' + END_PAREN = ')' + START_BRACKET = '[' + END_BRACKET = ']' + REGEX = '/regex/' + FUNCTION_DECLARATION = 'function(...)' + FUNCTION_NAME = 'function functionName(...)' + START_PARAMETERS = 'startparams(' + PARAMETERS = 'pa,ra,ms' + END_PARAMETERS = ')endparams' + SEMICOLON = ';' + DOC_FLAG = '@flag' + DOC_INLINE_FLAG = '{@flag ...}' + DOC_START_BRACE = 'doc {' + DOC_END_BRACE = 'doc }' + DOC_PREFIX = 'comment prefix: * ' + SIMPLE_LVALUE = 'lvalue=' + KEYWORD = 'keyword' + OPERATOR = 'operator' + IDENTIFIER = 'identifier' + + STRING_TYPES = frozenset([ + SINGLE_QUOTE_STRING_START, SINGLE_QUOTE_STRING_END, + DOUBLE_QUOTE_STRING_START, DOUBLE_QUOTE_STRING_END, STRING_TEXT]) + + COMMENT_TYPES = frozenset([START_SINGLE_LINE_COMMENT, COMMENT, + START_BLOCK_COMMENT, START_DOC_COMMENT, + END_BLOCK_COMMENT, END_DOC_COMMENT, + DOC_START_BRACE, DOC_END_BRACE, + DOC_FLAG, DOC_INLINE_FLAG, DOC_PREFIX]) + + FLAG_DESCRIPTION_TYPES = frozenset([ + DOC_INLINE_FLAG, COMMENT, DOC_START_BRACE, DOC_END_BRACE]) + + FLAG_ENDING_TYPES = frozenset([DOC_FLAG, END_DOC_COMMENT]) + + NON_CODE_TYPES = COMMENT_TYPES | frozenset([ + tokens.TokenType.WHITESPACE, tokens.TokenType.BLANK_LINE]) + + UNARY_OPERATORS = ['!', 'new', 'delete', 'typeof', 'void'] + + UNARY_OK_OPERATORS = ['--', '++', '-', '+'] + UNARY_OPERATORS + + UNARY_POST_OPERATORS = ['--', '++'] + + # An expression ender is any token that can end an object - i.e. we could have + # x.y or [1, 2], or (10 + 9) or {a: 10}. + EXPRESSION_ENDER_TYPES = [tokens.TokenType.NORMAL, IDENTIFIER, NUMBER, + SIMPLE_LVALUE, END_BRACKET, END_PAREN, END_BLOCK, + SINGLE_QUOTE_STRING_END, DOUBLE_QUOTE_STRING_END] + + +class JavaScriptToken(tokens.Token): + """JavaScript token subclass of Token, provides extra instance checks. + + The following token types have data in attached_object: + - All JsDoc flags: a parser.JsDocFlag object. + """ + + def IsKeyword(self, keyword): + """Tests if this token is the given keyword. + + Args: + keyword: The keyword to compare to. + + Returns: + True if this token is a keyword token with the given name. + """ + return self.type == JavaScriptTokenType.KEYWORD and self.string == keyword + + def IsOperator(self, operator): + """Tests if this token is the given operator. + + Args: + operator: The operator to compare to. + + Returns: + True if this token is a operator token with the given name. + """ + return self.type == JavaScriptTokenType.OPERATOR and self.string == operator + + def IsAssignment(self): + """Tests if this token is an assignment operator. + + Returns: + True if this token is an assignment operator. + """ + return (self.type == JavaScriptTokenType.OPERATOR and + self.string.endswith('=') and + self.string not in ('==', '!=', '>=', '<=', '===', '!==')) + + def IsComment(self): + """Tests if this token is any part of a comment. + + Returns: + True if this token is any part of a comment. + """ + return self.type in JavaScriptTokenType.COMMENT_TYPES + + def IsCode(self): + """Tests if this token is code, as opposed to a comment or whitespace.""" + return self.type not in JavaScriptTokenType.NON_CODE_TYPES + + def __repr__(self): + return '' % (self.line_number, + self.type, self.string, + self.values, + self.metadata) diff --git a/tools/closure_linter/closure_linter/statetracker.py b/tools/closure_linter/closure_linter/statetracker.py new file mode 100755 index 00000000000..5630c17bd84 --- /dev/null +++ b/tools/closure_linter/closure_linter/statetracker.py @@ -0,0 +1,964 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Light weight EcmaScript state tracker that reads tokens and tracks state.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import re + +from closure_linter import javascripttokenizer +from closure_linter import javascripttokens +from closure_linter import tokenutil + +# Shorthand +Type = javascripttokens.JavaScriptTokenType + + +class DocFlag(object): + """Generic doc flag object. + + Attribute: + flag_type: param, return, define, type, etc. + flag_token: The flag token. + type_start_token: The first token specifying the flag type, + including braces. + type_end_token: The last token specifying the flag type, + including braces. + type: The type spec. + name_token: The token specifying the flag name. + name: The flag name + description_start_token: The first token in the description. + description_end_token: The end token in the description. + description: The description. + """ + + # Please keep these lists alphabetized. + + # The list of standard jsdoc tags is from + STANDARD_DOC = frozenset([ + 'author', + 'bug', + 'const', + 'constructor', + 'define', + 'deprecated', + 'enum', + 'export', + 'extends', + 'externs', + 'fileoverview', + 'implements', + 'implicitCast', + 'interface', + 'license', + 'noalias', + 'nocompile', + 'nosideeffects', + 'override', + 'owner', + 'param', + 'preserve', + 'private', + 'return', + 'see', + 'supported', + 'template', + 'this', + 'type', + 'typedef', + ]) + + ANNOTATION = frozenset(['preserveTry', 'suppress']) + + LEGAL_DOC = STANDARD_DOC | ANNOTATION + + # Includes all Closure Compiler @suppress types. + # Not all of these annotations are interpreted by Closure Linter. + SUPPRESS_TYPES = frozenset([ + 'accessControls', + 'checkRegExp', + 'checkTypes', + 'checkVars', + 'deprecated', + 'duplicate', + 'fileoverviewTags', + 'invalidCasts', + 'missingProperties', + 'nonStandardJsDocs', + 'strictModuleDepCheck', + 'undefinedVars', + 'underscore', + 'unknownDefines', + 'uselessCode', + 'visibility', + 'with']) + + HAS_DESCRIPTION = frozenset([ + 'define', 'deprecated', 'desc', 'fileoverview', 'license', 'param', + 'preserve', 'return', 'supported']) + + HAS_TYPE = frozenset([ + 'define', 'enum', 'extends', 'implements', 'param', 'return', 'type', + 'suppress']) + + TYPE_ONLY = frozenset(['enum', 'extends', 'implements', 'suppress', 'type']) + + HAS_NAME = frozenset(['param']) + + EMPTY_COMMENT_LINE = re.compile(r'^\s*\*?\s*$') + EMPTY_STRING = re.compile(r'^\s*$') + + def __init__(self, flag_token): + """Creates the DocFlag object and attaches it to the given start token. + + Args: + flag_token: The starting token of the flag. + """ + self.flag_token = flag_token + self.flag_type = flag_token.string.strip().lstrip('@') + + # Extract type, if applicable. + self.type = None + self.type_start_token = None + self.type_end_token = None + if self.flag_type in self.HAS_TYPE: + brace = tokenutil.SearchUntil(flag_token, [Type.DOC_START_BRACE], + Type.FLAG_ENDING_TYPES) + if brace: + end_token, contents = _GetMatchingEndBraceAndContents(brace) + self.type = contents + self.type_start_token = brace + self.type_end_token = end_token + elif (self.flag_type in self.TYPE_ONLY and + flag_token.next.type not in Type.FLAG_ENDING_TYPES): + self.type_start_token = flag_token.next + self.type_end_token, self.type = _GetEndTokenAndContents( + self.type_start_token) + if self.type is not None: + self.type = self.type.strip() + + # Extract name, if applicable. + self.name_token = None + self.name = None + if self.flag_type in self.HAS_NAME: + # Handle bad case, name could be immediately after flag token. + self.name_token = _GetNextIdentifierToken(flag_token) + + # Handle good case, if found token is after type start, look for + # identifier after type end, since types contain identifiers. + if (self.type and self.name_token and + tokenutil.Compare(self.name_token, self.type_start_token) > 0): + self.name_token = _GetNextIdentifierToken(self.type_end_token) + + if self.name_token: + self.name = self.name_token.string + + # Extract description, if applicable. + self.description_start_token = None + self.description_end_token = None + self.description = None + if self.flag_type in self.HAS_DESCRIPTION: + search_start_token = flag_token + if self.name_token and self.type_end_token: + if tokenutil.Compare(self.type_end_token, self.name_token) > 0: + search_start_token = self.type_end_token + else: + search_start_token = self.name_token + elif self.name_token: + search_start_token = self.name_token + elif self.type: + search_start_token = self.type_end_token + + interesting_token = tokenutil.Search(search_start_token, + Type.FLAG_DESCRIPTION_TYPES | Type.FLAG_ENDING_TYPES) + if interesting_token.type in Type.FLAG_DESCRIPTION_TYPES: + self.description_start_token = interesting_token + self.description_end_token, self.description = ( + _GetEndTokenAndContents(interesting_token)) + + +class DocComment(object): + """JavaScript doc comment object. + + Attributes: + ordered_params: Ordered list of parameters documented. + start_token: The token that starts the doc comment. + end_token: The token that ends the doc comment. + suppressions: Map of suppression type to the token that added it. + """ + def __init__(self, start_token): + """Create the doc comment object. + + Args: + start_token: The first token in the doc comment. + """ + self.__params = {} + self.ordered_params = [] + self.__flags = {} + self.start_token = start_token + self.end_token = None + self.suppressions = {} + self.invalidated = False + + def Invalidate(self): + """Indicate that the JSDoc is well-formed but we had problems parsing it. + + This is a short-circuiting mechanism so that we don't emit false + positives about well-formed doc comments just because we don't support + hot new syntaxes. + """ + self.invalidated = True + + def IsInvalidated(self): + """Test whether Invalidate() has been called.""" + return self.invalidated + + def AddParam(self, name, param_type): + """Add a new documented parameter. + + Args: + name: The name of the parameter to document. + param_type: The parameter's declared JavaScript type. + """ + self.ordered_params.append(name) + self.__params[name] = param_type + + def AddSuppression(self, token): + """Add a new error suppression flag. + + Args: + token: The suppression flag token. + """ + #TODO(user): Error if no braces + brace = tokenutil.SearchUntil(token, [Type.DOC_START_BRACE], + [Type.DOC_FLAG]) + if brace: + end_token, contents = _GetMatchingEndBraceAndContents(brace) + self.suppressions[contents] = token + + def AddFlag(self, flag): + """Add a new document flag. + + Args: + flag: DocFlag object. + """ + self.__flags[flag.flag_type] = flag + + def InheritsDocumentation(self): + """Test if the jsdoc implies documentation inheritance. + + Returns: + True if documentation may be pulled off the superclass. + """ + return (self.HasFlag('inheritDoc') or + (self.HasFlag('override') and + not self.HasFlag('return') and + not self.HasFlag('param'))) + + def HasFlag(self, flag_type): + """Test if the given flag has been set. + + Args: + flag_type: The type of the flag to check. + + Returns: + True if the flag is set. + """ + return flag_type in self.__flags + + def GetFlag(self, flag_type): + """Gets the last flag of the given type. + + Args: + flag_type: The type of the flag to get. + + Returns: + The last instance of the given flag type in this doc comment. + """ + return self.__flags[flag_type] + + def CompareParameters(self, params): + """Computes the edit distance and list from the function params to the docs. + + Uses the Levenshtein edit distance algorithm, with code modified from + http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Python + + Args: + params: The parameter list for the function declaration. + + Returns: + The edit distance, the edit list. + """ + source_len, target_len = len(self.ordered_params), len(params) + edit_lists = [[]] + distance = [[]] + for i in range(target_len+1): + edit_lists[0].append(['I'] * i) + distance[0].append(i) + + for j in range(1, source_len+1): + edit_lists.append([['D'] * j]) + distance.append([j]) + + for i in range(source_len): + for j in range(target_len): + cost = 1 + if self.ordered_params[i] == params[j]: + cost = 0 + + deletion = distance[i][j+1] + 1 + insertion = distance[i+1][j] + 1 + substitution = distance[i][j] + cost + + edit_list = None + best = None + if deletion <= insertion and deletion <= substitution: + # Deletion is best. + best = deletion + edit_list = list(edit_lists[i][j+1]) + edit_list.append('D') + + elif insertion <= substitution: + # Insertion is best. + best = insertion + edit_list = list(edit_lists[i+1][j]) + edit_list.append('I') + edit_lists[i+1].append(edit_list) + + else: + # Substitution is best. + best = substitution + edit_list = list(edit_lists[i][j]) + if cost: + edit_list.append('S') + else: + edit_list.append('=') + + edit_lists[i+1].append(edit_list) + distance[i+1].append(best) + + return distance[source_len][target_len], edit_lists[source_len][target_len] + + def __repr__(self): + """Returns a string representation of this object. + + Returns: + A string representation of this object. + """ + return '' % (str(self.__params), str(self.__flags)) + + +# +# Helper methods used by DocFlag and DocComment to parse out flag information. +# + + +def _GetMatchingEndBraceAndContents(start_brace): + """Returns the matching end brace and contents between the two braces. + + If any FLAG_ENDING_TYPE token is encountered before a matching end brace, then + that token is used as the matching ending token. Contents will have all + comment prefixes stripped out of them, and all comment prefixes in between the + start and end tokens will be split out into separate DOC_PREFIX tokens. + + Args: + start_brace: The DOC_START_BRACE token immediately before desired contents. + + Returns: + The matching ending token (DOC_END_BRACE or FLAG_ENDING_TYPE) and a string + of the contents between the matching tokens, minus any comment prefixes. + """ + open_count = 1 + close_count = 0 + contents = [] + + # We don't consider the start brace part of the type string. + token = start_brace.next + while open_count != close_count: + if token.type == Type.DOC_START_BRACE: + open_count += 1 + elif token.type == Type.DOC_END_BRACE: + close_count += 1 + + if token.type != Type.DOC_PREFIX: + contents.append(token.string) + + if token.type in Type.FLAG_ENDING_TYPES: + break + token = token.next + + #Don't include the end token (end brace, end doc comment, etc.) in type. + token = token.previous + contents = contents[:-1] + + return token, ''.join(contents) + + +def _GetNextIdentifierToken(start_token): + """Searches for and returns the first identifier at the beginning of a token. + + Searches each token after the start to see if it starts with an identifier. + If found, will split the token into at most 3 piecies: leading whitespace, + identifier, rest of token, returning the identifier token. If no identifier is + found returns None and changes no tokens. Search is abandoned when a + FLAG_ENDING_TYPE token is found. + + Args: + start_token: The token to start searching after. + + Returns: + The identifier token is found, None otherwise. + """ + token = start_token.next + + while token and not token.type in Type.FLAG_ENDING_TYPES: + match = javascripttokenizer.JavaScriptTokenizer.IDENTIFIER.match( + token.string) + if (match is not None and token.type == Type.COMMENT and + len(token.string) == len(match.group(0))): + return token + + token = token.next + + return None + + +def _GetEndTokenAndContents(start_token): + """Returns last content token and all contents before FLAG_ENDING_TYPE token. + + Comment prefixes are split into DOC_PREFIX tokens and stripped from the + returned contents. + + Args: + start_token: The token immediately before the first content token. + + Returns: + The last content token and a string of all contents including start and + end tokens, with comment prefixes stripped. + """ + iterator = start_token + last_line = iterator.line_number + last_token = None + contents = '' + while not iterator.type in Type.FLAG_ENDING_TYPES: + if (iterator.IsFirstInLine() and + DocFlag.EMPTY_COMMENT_LINE.match(iterator.line)): + # If we have a blank comment line, consider that an implicit + # ending of the description. This handles a case like: + # + # * @return {boolean} True + # * + # * Note: This is a sentence. + # + # The note is not part of the @return description, but there was + # no definitive ending token. Rather there was a line containing + # only a doc comment prefix or whitespace. + break + + if iterator.type in Type.FLAG_DESCRIPTION_TYPES: + contents += iterator.string + last_token = iterator + + iterator = iterator.next + if iterator.line_number != last_line: + contents += '\n' + last_line = iterator.line_number + + end_token = last_token + if DocFlag.EMPTY_STRING.match(contents): + contents = None + else: + # Strip trailing newline. + contents = contents[:-1] + + return end_token, contents + + +class Function(object): + """Data about a JavaScript function. + + Attributes: + block_depth: Block depth the function began at. + doc: The DocComment associated with the function. + has_return: If the function has a return value. + has_this: If the function references the 'this' object. + is_assigned: If the function is part of an assignment. + is_constructor: If the function is a constructor. + name: The name of the function, whether given in the function keyword or + as the lvalue the function is assigned to. + """ + + def __init__(self, block_depth, is_assigned, doc, name): + self.block_depth = block_depth + self.is_assigned = is_assigned + self.is_constructor = doc and doc.HasFlag('constructor') + self.is_interface = doc and doc.HasFlag('interface') + self.has_return = False + self.has_this = False + self.name = name + self.doc = doc + + +class StateTracker(object): + """EcmaScript state tracker. + + Tracks block depth, function names, etc. within an EcmaScript token stream. + """ + + OBJECT_LITERAL = 'o' + CODE = 'c' + + def __init__(self, doc_flag=DocFlag): + """Initializes a JavaScript token stream state tracker. + + Args: + doc_flag: An optional custom DocFlag used for validating + documentation flags. + """ + self._doc_flag = doc_flag + self.Reset() + + def Reset(self): + """Resets the state tracker to prepare for processing a new page.""" + self._block_depth = 0 + self._is_block_close = False + self._paren_depth = 0 + self._functions = [] + self._functions_by_name = {} + self._last_comment = None + self._doc_comment = None + self._cumulative_params = None + self._block_types = [] + self._last_non_space_token = None + self._last_line = None + self._first_token = None + self._documented_identifiers = set() + + def InFunction(self): + """Returns true if the current token is within a function. + + Returns: + True if the current token is within a function. + """ + return bool(self._functions) + + def InConstructor(self): + """Returns true if the current token is within a constructor. + + Returns: + True if the current token is within a constructor. + """ + return self.InFunction() and self._functions[-1].is_constructor + + def InInterfaceMethod(self): + """Returns true if the current token is within an interface method. + + Returns: + True if the current token is within an interface method. + """ + if self.InFunction(): + if self._functions[-1].is_interface: + return True + else: + name = self._functions[-1].name + prototype_index = name.find('.prototype.') + if prototype_index != -1: + class_function_name = name[0:prototype_index] + if (class_function_name in self._functions_by_name and + self._functions_by_name[class_function_name].is_interface): + return True + + return False + + def InTopLevelFunction(self): + """Returns true if the current token is within a top level function. + + Returns: + True if the current token is within a top level function. + """ + return len(self._functions) == 1 and self.InTopLevel() + + def InAssignedFunction(self): + """Returns true if the current token is within a function variable. + + Returns: + True if if the current token is within a function variable + """ + return self.InFunction() and self._functions[-1].is_assigned + + def IsFunctionOpen(self): + """Returns true if the current token is a function block open. + + Returns: + True if the current token is a function block open. + """ + return (self._functions and + self._functions[-1].block_depth == self._block_depth - 1) + + def IsFunctionClose(self): + """Returns true if the current token is a function block close. + + Returns: + True if the current token is a function block close. + """ + return (self._functions and + self._functions[-1].block_depth == self._block_depth) + + def InBlock(self): + """Returns true if the current token is within a block. + + Returns: + True if the current token is within a block. + """ + return bool(self._block_depth) + + def IsBlockClose(self): + """Returns true if the current token is a block close. + + Returns: + True if the current token is a block close. + """ + return self._is_block_close + + def InObjectLiteral(self): + """Returns true if the current token is within an object literal. + + Returns: + True if the current token is within an object literal. + """ + return self._block_depth and self._block_types[-1] == self.OBJECT_LITERAL + + def InObjectLiteralDescendant(self): + """Returns true if the current token has an object literal ancestor. + + Returns: + True if the current token has an object literal ancestor. + """ + return self.OBJECT_LITERAL in self._block_types + + def InParentheses(self): + """Returns true if the current token is within parentheses. + + Returns: + True if the current token is within parentheses. + """ + return bool(self._paren_depth) + + def InTopLevel(self): + """Whether we are at the top level in the class. + + This function call is language specific. In some languages like + JavaScript, a function is top level if it is not inside any parenthesis. + In languages such as ActionScript, a function is top level if it is directly + within a class. + """ + raise TypeError('Abstract method InTopLevel not implemented') + + def GetBlockType(self, token): + """Determine the block type given a START_BLOCK token. + + Code blocks come after parameters, keywords like else, and closing parens. + + Args: + token: The current token. Can be assumed to be type START_BLOCK. + Returns: + Code block type for current token. + """ + raise TypeError('Abstract method GetBlockType not implemented') + + def GetParams(self): + """Returns the accumulated input params as an array. + + In some EcmasSript languages, input params are specified like + (param:Type, param2:Type2, ...) + in other they are specified just as + (param, param2) + We handle both formats for specifying parameters here and leave + it to the compilers for each language to detect compile errors. + This allows more code to be reused between lint checkers for various + EcmaScript languages. + + Returns: + The accumulated input params as an array. + """ + params = [] + if self._cumulative_params: + params = re.compile(r'\s+').sub('', self._cumulative_params).split(',') + # Strip out the type from parameters of the form name:Type. + params = map(lambda param: param.split(':')[0], params) + + return params + + def GetLastComment(self): + """Return the last plain comment that could be used as documentation. + + Returns: + The last plain comment that could be used as documentation. + """ + return self._last_comment + + def GetDocComment(self): + """Return the most recent applicable documentation comment. + + Returns: + The last applicable documentation comment. + """ + return self._doc_comment + + def HasDocComment(self, identifier): + """Returns whether the identifier has been documented yet. + + Args: + identifier: The identifier. + + Returns: + Whether the identifier has been documented yet. + """ + return identifier in self._documented_identifiers + + def InDocComment(self): + """Returns whether the current token is in a doc comment. + + Returns: + Whether the current token is in a doc comment. + """ + return self._doc_comment and self._doc_comment.end_token is None + + def GetDocFlag(self): + """Returns the current documentation flags. + + Returns: + The current documentation flags. + """ + return self._doc_flag + + def IsTypeToken(self, t): + if self.InDocComment() and t.type not in (Type.START_DOC_COMMENT, + Type.DOC_FLAG, Type.DOC_INLINE_FLAG, Type.DOC_PREFIX): + f = tokenutil.SearchUntil(t, [Type.DOC_FLAG], [Type.START_DOC_COMMENT], + None, True) + if f and f.attached_object.type_start_token is not None: + return (tokenutil.Compare(t, f.attached_object.type_start_token) > 0 and + tokenutil.Compare(t, f.attached_object.type_end_token) < 0) + return False + + def GetFunction(self): + """Return the function the current code block is a part of. + + Returns: + The current Function object. + """ + if self._functions: + return self._functions[-1] + + def GetBlockDepth(self): + """Return the block depth. + + Returns: + The current block depth. + """ + return self._block_depth + + def GetLastNonSpaceToken(self): + """Return the last non whitespace token.""" + return self._last_non_space_token + + def GetLastLine(self): + """Return the last line.""" + return self._last_line + + def GetFirstToken(self): + """Return the very first token in the file.""" + return self._first_token + + def HandleToken(self, token, last_non_space_token): + """Handles the given token and updates state. + + Args: + token: The token to handle. + last_non_space_token: + """ + self._is_block_close = False + + if not self._first_token: + self._first_token = token + + # Track block depth. + type = token.type + if type == Type.START_BLOCK: + self._block_depth += 1 + + # Subclasses need to handle block start very differently because + # whether a block is a CODE or OBJECT_LITERAL block varies significantly + # by language. + self._block_types.append(self.GetBlockType(token)) + + # Track block depth. + elif type == Type.END_BLOCK: + self._is_block_close = not self.InObjectLiteral() + self._block_depth -= 1 + self._block_types.pop() + + # Track parentheses depth. + elif type == Type.START_PAREN: + self._paren_depth += 1 + + # Track parentheses depth. + elif type == Type.END_PAREN: + self._paren_depth -= 1 + + elif type == Type.COMMENT: + self._last_comment = token.string + + elif type == Type.START_DOC_COMMENT: + self._last_comment = None + self._doc_comment = DocComment(token) + + elif type == Type.END_DOC_COMMENT: + self._doc_comment.end_token = token + + elif type in (Type.DOC_FLAG, Type.DOC_INLINE_FLAG): + flag = self._doc_flag(token) + token.attached_object = flag + self._doc_comment.AddFlag(flag) + + if flag.flag_type == 'param' and flag.name: + self._doc_comment.AddParam(flag.name, flag.type) + elif flag.flag_type == 'suppress': + self._doc_comment.AddSuppression(token) + + elif type == Type.FUNCTION_DECLARATION: + last_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES, None, + True) + doc = None + # Only functions outside of parens are eligible for documentation. + if not self._paren_depth: + doc = self._doc_comment + + name = '' + is_assigned = last_code and (last_code.IsOperator('=') or + last_code.IsOperator('||') or last_code.IsOperator('&&') or + (last_code.IsOperator(':') and not self.InObjectLiteral())) + if is_assigned: + # TODO(robbyw): This breaks for x[2] = ... + # Must use loop to find full function name in the case of line-wrapped + # declarations (bug 1220601) like: + # my.function.foo. + # bar = function() ... + identifier = tokenutil.Search(last_code, Type.SIMPLE_LVALUE, None, True) + while identifier and identifier.type in ( + Type.IDENTIFIER, Type.SIMPLE_LVALUE): + name = identifier.string + name + # Traverse behind us, skipping whitespace and comments. + while True: + identifier = identifier.previous + if not identifier or not identifier.type in Type.NON_CODE_TYPES: + break + + else: + next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + while next_token and next_token.IsType(Type.FUNCTION_NAME): + name += next_token.string + next_token = tokenutil.Search(next_token, Type.FUNCTION_NAME, 2) + + function = Function(self._block_depth, is_assigned, doc, name) + self._functions.append(function) + self._functions_by_name[name] = function + + elif type == Type.START_PARAMETERS: + self._cumulative_params = '' + + elif type == Type.PARAMETERS: + self._cumulative_params += token.string + + elif type == Type.KEYWORD and token.string == 'return': + next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + if not next_token.IsType(Type.SEMICOLON): + function = self.GetFunction() + if function: + function.has_return = True + + elif type == Type.SIMPLE_LVALUE: + identifier = token.values['identifier'] + jsdoc = self.GetDocComment() + if jsdoc: + self._documented_identifiers.add(identifier) + + self._HandleIdentifier(identifier, True) + + elif type == Type.IDENTIFIER: + self._HandleIdentifier(token.string, False) + + # Detect documented non-assignments. + next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + if next_token.IsType(Type.SEMICOLON): + if (self._last_non_space_token and + self._last_non_space_token.IsType(Type.END_DOC_COMMENT)): + self._documented_identifiers.add(token.string) + + def _HandleIdentifier(self, identifier, is_assignment): + """Process the given identifier. + + Currently checks if it references 'this' and annotates the function + accordingly. + + Args: + identifier: The identifer to process. + is_assignment: Whether the identifer is being written to. + """ + if identifier == 'this' or identifier.startswith('this.'): + function = self.GetFunction() + if function: + function.has_this = True + + + def HandleAfterToken(self, token): + """Handle updating state after a token has been checked. + + This function should be used for destructive state changes such as + deleting a tracked object. + + Args: + token: The token to handle. + """ + type = token.type + if type == Type.SEMICOLON or type == Type.END_PAREN or ( + type == Type.END_BRACKET and + self._last_non_space_token.type not in ( + Type.SINGLE_QUOTE_STRING_END, Type.DOUBLE_QUOTE_STRING_END)): + # We end on any numeric array index, but keep going for string based + # array indices so that we pick up manually exported identifiers. + self._doc_comment = None + self._last_comment = None + + elif type == Type.END_BLOCK: + self._doc_comment = None + self._last_comment = None + + if self.InFunction() and self.IsFunctionClose(): + # TODO(robbyw): Detect the function's name for better errors. + self._functions.pop() + + elif type == Type.END_PARAMETERS and self._doc_comment: + self._doc_comment = None + self._last_comment = None + + if not token.IsAnyType(Type.WHITESPACE, Type.BLANK_LINE): + self._last_non_space_token = token + + self._last_line = token.line diff --git a/tools/closure_linter/closure_linter/tokenutil.py b/tools/closure_linter/closure_linter/tokenutil.py new file mode 100755 index 00000000000..6ed5f7f81cd --- /dev/null +++ b/tools/closure_linter/closure_linter/tokenutil.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Token utility functions.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import tokens +from closure_linter import javascripttokens + +import copy + +# Shorthand +JavaScriptToken = javascripttokens.JavaScriptToken +Type = tokens.TokenType + +def GetFirstTokenInSameLine(token): + """Returns the first token in the same line as token. + + Args: + token: Any token in the line. + + Returns: + The first token in the same line as token. + """ + while not token.IsFirstInLine(): + token = token.previous + return token + + +def CustomSearch(start_token, func, end_func=None, distance=None, + reverse=False): + """Returns the first token where func is True within distance of this token. + + Args: + start_token: The token to start searching from + func: The function to call to test a token for applicability + end_func: The function to call to test a token to determine whether to abort + the search. + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + Returns: + The first token matching func within distance of this token, or None if no + such token is found. + """ + token = start_token + if reverse: + while token and (distance is None or distance > 0): + previous = token.previous + if previous: + if func(previous): + return previous + if end_func and end_func(previous): + return None + + token = previous + if distance is not None: + distance -= 1 + + else: + while token and (distance is None or distance > 0): + next = token.next + if next: + if func(next): + return next + if end_func and end_func(next): + return None + + token = next + if distance is not None: + distance -= 1 + + return None + + +def Search(start_token, token_types, distance=None, reverse=False): + """Returns the first token of type in token_types within distance. + + Args: + start_token: The token to start searching from + token_types: The allowable types of the token being searched for + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + Returns: + The first token of any type in token_types within distance of this token, or + None if no such token is found. + """ + return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), + None, distance, reverse) + + +def SearchExcept(start_token, token_types, distance=None, reverse=False): + """Returns the first token not of any type in token_types within distance. + + Args: + start_token: The token to start searching from + token_types: The unallowable types of the token being searched for + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + + Returns: + The first token of any type in token_types within distance of this token, or + None if no such token is found. + """ + return CustomSearch(start_token, + lambda token: not token.IsAnyType(token_types), + None, distance, reverse) + + +def SearchUntil(start_token, token_types, end_types, distance=None, + reverse=False): + """Returns the first token of type in token_types before a token of end_type. + + Args: + start_token: The token to start searching from. + token_types: The allowable types of the token being searched for. + end_types: Types of tokens to abort search if we find. + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + Returns: + The first token of any type in token_types within distance of this token + before any tokens of type in end_type, or None if no such token is found. + """ + return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), + lambda token: token.IsAnyType(end_types), + distance, reverse) + + +def DeleteToken(token): + """Deletes the given token from the linked list. + + Args: + token: The token to delete + """ + if token.previous: + token.previous.next = token.next + + if token.next: + token.next.previous = token.previous + + following_token = token.next + while following_token and following_token.metadata.last_code == token: + following_token.metadata.last_code = token.metadata.last_code + following_token = following_token.next + +def DeleteTokens(token, tokenCount): + """Deletes the given number of tokens starting with the given token. + + Args: + token: The token to start deleting at. + tokenCount: The total number of tokens to delete. + """ + for i in xrange(1, tokenCount): + DeleteToken(token.next) + DeleteToken(token) + +def InsertTokenAfter(new_token, token): + """Insert new_token after token + + Args: + new_token: A token to be added to the stream + token: A token already in the stream + """ + new_token.previous = token + new_token.next = token.next + + new_token.metadata = copy.copy(token.metadata) + + if token.IsCode(): + new_token.metadata.last_code = token + + if new_token.IsCode(): + following_token = token.next + while following_token and following_token.metadata.last_code == token: + following_token.metadata.last_code = new_token + following_token = following_token.next + + token.next = new_token + if new_token.next: + new_token.next.previous = new_token + + if new_token.start_index is None: + if new_token.line_number == token.line_number: + new_token.start_index = token.start_index + len(token.string) + else: + new_token.start_index = 0 + + iterator = new_token.next + while iterator and iterator.line_number == new_token.line_number: + iterator.start_index += len(new_token.string) + iterator = iterator.next + + +def InsertSpaceTokenAfter(token): + """Inserts a space token after the given token. + + Args: + token: The token to insert a space token after + + Returns: + A single space token""" + space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line, + token.line_number) + InsertTokenAfter(space_token, token) + + +def InsertLineAfter(token): + """Inserts a blank line after the given token. + + Args: + token: The token to insert a blank line after + + Returns: + A single space token""" + blank_token = JavaScriptToken('', Type.BLANK_LINE, '', + token.line_number + 1) + InsertTokenAfter(blank_token, token) + # Update all subsequent ine numbers. + blank_token = blank_token.next + while blank_token: + blank_token.line_number += 1 + blank_token = blank_token.next + + +def SplitToken(token, position): + """Splits the token into two tokens at position. + + Args: + token: The token to split + position: The position to split at. Will be the beginning of second token. + + Returns: + The new second token. + """ + new_string = token.string[position:] + token.string = token.string[:position] + + new_token = JavaScriptToken(new_string, token.type, token.line, + token.line_number) + InsertTokenAfter(new_token, token) + + return new_token + + +def Compare(token1, token2): + """Compares two tokens and determines their relative order. + + Returns: + A negative integer, zero, or a positive integer as the first token is + before, equal, or after the second in the token stream. + """ + if token2.line_number != token1.line_number: + return token1.line_number - token2.line_number + else: + return token1.start_index - token2.start_index diff --git a/tools/closure_linter/gflags.py b/tools/closure_linter/gflags.py new file mode 100644 index 00000000000..21aa88e761f --- /dev/null +++ b/tools/closure_linter/gflags.py @@ -0,0 +1,2489 @@ +#!/usr/bin/env python + +# Copyright (c) 2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# --- +# Author: Chad Lester +# Design and style contributions by: +# Amit Patel, Bogdan Cocosel, Daniel Dulitz, Eric Tiedemann, +# Eric Veach, Laurence Gonsalves, Matthew Springer +# Code reorganized a bit by Craig Silverstein + +"""This module is used to define and parse command line flags. + +This module defines a *distributed* flag-definition policy: rather than +an application having to define all flags in or near main(), each python +module defines flags that are useful to it. When one python module +imports another, it gains access to the other's flags. (This is +implemented by having all modules share a common, global registry object +containing all the flag information.) + +Flags are defined through the use of one of the DEFINE_xxx functions. +The specific function used determines how the flag is parsed, checked, +and optionally type-converted, when it's seen on the command line. + + +IMPLEMENTATION: DEFINE_* creates a 'Flag' object and registers it with a +'FlagValues' object (typically the global FlagValues FLAGS, defined +here). The 'FlagValues' object can scan the command line arguments and +pass flag arguments to the corresponding 'Flag' objects for +value-checking and type conversion. The converted flag values are +available as attributes of the 'FlagValues' object. + +Code can access the flag through a FlagValues object, for instance +gflags.FLAGS.myflag. Typically, the __main__ module passes the +command line arguments to gflags.FLAGS for parsing. + +At bottom, this module calls getopt(), so getopt functionality is +supported, including short- and long-style flags, and the use of -- to +terminate flags. + +Methods defined by the flag module will throw 'FlagsError' exceptions. +The exception argument will be a human-readable string. + + +FLAG TYPES: This is a list of the DEFINE_*'s that you can do. All flags +take a name, default value, help-string, and optional 'short' name +(one-letter name). Some flags have other arguments, which are described +with the flag. + +DEFINE_string: takes any input, and interprets it as a string. + +DEFINE_bool or +DEFINE_boolean: typically does not take an argument: say --myflag to + set FLAGS.myflag to true, or --nomyflag to set + FLAGS.myflag to false. Alternately, you can say + --myflag=true or --myflag=t or --myflag=1 or + --myflag=false or --myflag=f or --myflag=0 + +DEFINE_float: takes an input and interprets it as a floating point + number. Takes optional args lower_bound and upper_bound; + if the number specified on the command line is out of + range, it will raise a FlagError. + +DEFINE_integer: takes an input and interprets it as an integer. Takes + optional args lower_bound and upper_bound as for floats. + +DEFINE_enum: takes a list of strings which represents legal values. If + the command-line value is not in this list, raise a flag + error. Otherwise, assign to FLAGS.flag as a string. + +DEFINE_list: Takes a comma-separated list of strings on the commandline. + Stores them in a python list object. + +DEFINE_spaceseplist: Takes a space-separated list of strings on the + commandline. Stores them in a python list object. + Example: --myspacesepflag "foo bar baz" + +DEFINE_multistring: The same as DEFINE_string, except the flag can be + specified more than once on the commandline. The + result is a python list object (list of strings), + even if the flag is only on the command line once. + +DEFINE_multi_int: The same as DEFINE_integer, except the flag can be + specified more than once on the commandline. The + result is a python list object (list of ints), even if + the flag is only on the command line once. + + +SPECIAL FLAGS: There are a few flags that have special meaning: + --help prints a list of all the flags in a human-readable fashion + --helpshort prints a list of all key flags (see below). + --helpxml prints a list of all flags, in XML format. DO NOT parse + the output of --help and --helpshort. Instead, parse + the output of --helpxml. For more info, see + "OUTPUT FOR --helpxml" below. + --flagfile=foo read flags from file foo. + --undefok=f1,f2 ignore unrecognized option errors for f1,f2. + For boolean flags, you should use --undefok=boolflag, and + --boolflag and --noboolflag will be accepted. Do not use + --undefok=noboolflag. + -- as in getopt(), terminates flag-processing + + +NOTE ON --flagfile: + +Flags may be loaded from text files in addition to being specified on +the commandline. + +Any flags you don't feel like typing, throw them in a file, one flag per +line, for instance: + --myflag=myvalue + --nomyboolean_flag +You then specify your file with the special flag '--flagfile=somefile'. +You CAN recursively nest flagfile= tokens OR use multiple files on the +command line. Lines beginning with a single hash '#' or a double slash +'//' are comments in your flagfile. + +Any flagfile= will be interpreted as having a relative path from +the current working directory rather than from the place the file was +included from: + myPythonScript.py --flagfile=config/somefile.cfg + +If somefile.cfg includes further --flagfile= directives, these will be +referenced relative to the original CWD, not from the directory the +including flagfile was found in! + +The caveat applies to people who are including a series of nested files +in a different dir than they are executing out of. Relative path names +are always from CWD, not from the directory of the parent include +flagfile. We do now support '~' expanded directory names. + +Absolute path names ALWAYS work! + + +EXAMPLE USAGE: + + import gflags + FLAGS = gflags.FLAGS + + # Flag names are globally defined! So in general, we need to be + # careful to pick names that are unlikely to be used by other libraries. + # If there is a conflict, we'll get an error at import time. + gflags.DEFINE_string('name', 'Mr. President', 'your name') + gflags.DEFINE_integer('age', None, 'your age in years', lower_bound=0) + gflags.DEFINE_boolean('debug', False, 'produces debugging output') + gflags.DEFINE_enum('gender', 'male', ['male', 'female'], 'your gender') + + def main(argv): + try: + argv = FLAGS(argv) # parse flags + except gflags.FlagsError, e: + print '%s\\nUsage: %s ARGS\\n%s' % (e, sys.argv[0], FLAGS) + sys.exit(1) + if FLAGS.debug: print 'non-flag arguments:', argv + print 'Happy Birthday', FLAGS.name + if FLAGS.age is not None: + print 'You are a %s, who is %d years old' % (FLAGS.gender, FLAGS.age) + + if __name__ == '__main__': + main(sys.argv) + + +KEY FLAGS: + +As we already explained, each module gains access to all flags defined +by all the other modules it transitively imports. In the case of +non-trivial scripts, this means a lot of flags ... For documentation +purposes, it is good to identify the flags that are key (i.e., really +important) to a module. Clearly, the concept of "key flag" is a +subjective one. When trying to determine whether a flag is key to a +module or not, assume that you are trying to explain your module to a +potential user: which flags would you really like to mention first? + +We'll describe shortly how to declare which flags are key to a module. +For the moment, assume we know the set of key flags for each module. +Then, if you use the app.py module, you can use the --helpshort flag to +print only the help for the flags that are key to the main module, in a +human-readable format. + +NOTE: If you need to parse the flag help, do NOT use the output of +--help / --helpshort. That output is meant for human consumption, and +may be changed in the future. Instead, use --helpxml; flags that are +key for the main module are marked there with a yes element. + +The set of key flags for a module M is composed of: + +1. Flags defined by module M by calling a DEFINE_* function. + +2. Flags that module M explictly declares as key by using the function + + DECLARE_key_flag() + +3. Key flags of other modules that M specifies by using the function + + ADOPT_module_key_flags() + + This is a "bulk" declaration of key flags: each flag that is key for + becomes key for the current module too. + +Notice that if you do not use the functions described at points 2 and 3 +above, then --helpshort prints information only about the flags defined +by the main module of our script. In many cases, this behavior is good +enough. But if you move part of the main module code (together with the +related flags) into a different module, then it is nice to use +DECLARE_key_flag / ADOPT_module_key_flags and make sure --helpshort +lists all relevant flags (otherwise, your code refactoring may confuse +your users). + +Note: each of DECLARE_key_flag / ADOPT_module_key_flags has its own +pluses and minuses: DECLARE_key_flag is more targeted and may lead a +more focused --helpshort documentation. ADOPT_module_key_flags is good +for cases when an entire module is considered key to the current script. +Also, it does not require updates to client scripts when a new flag is +added to the module. + + +EXAMPLE USAGE 2 (WITH KEY FLAGS): + +Consider an application that contains the following three files (two +auxiliary modules and a main module): + +File libfoo.py: + + import gflags + + gflags.DEFINE_integer('num_replicas', 3, 'Number of replicas to start') + gflags.DEFINE_boolean('rpc2', True, 'Turn on the usage of RPC2.') + + ... some code ... + +File libbar.py: + + import gflags + + gflags.DEFINE_string('bar_gfs_path', '/gfs/path', + 'Path to the GFS files for libbar.') + gflags.DEFINE_string('email_for_bar_errors', 'bar-team@google.com', + 'Email address for bug reports about module libbar.') + gflags.DEFINE_boolean('bar_risky_hack', False, + 'Turn on an experimental and buggy optimization.') + + ... some code ... + +File myscript.py: + + import gflags + import libfoo + import libbar + + gflags.DEFINE_integer('num_iterations', 0, 'Number of iterations.') + + # Declare that all flags that are key for libfoo are + # key for this module too. + gflags.ADOPT_module_key_flags(libfoo) + + # Declare that the flag --bar_gfs_path (defined in libbar) is key + # for this module. + gflags.DECLARE_key_flag('bar_gfs_path') + + ... some code ... + +When myscript is invoked with the flag --helpshort, the resulted help +message lists information about all the key flags for myscript: +--num_iterations, --num_replicas, --rpc2, and --bar_gfs_path (in +addition to the special flags --help and --helpshort). + +Of course, myscript uses all the flags declared by it (in this case, +just --num_replicas) or by any of the modules it transitively imports +(e.g., the modules libfoo, libbar). E.g., it can access the value of +FLAGS.bar_risky_hack, even if --bar_risky_hack is not declared as a key +flag for myscript. + + +OUTPUT FOR --helpxml: + +The --helpxml flag generates output with the following structure: + + + + PROGRAM_BASENAME + MAIN_MODULE_DOCSTRING + ( + [yes] + DECLARING_MODULE + FLAG_NAME + FLAG_HELP_MESSAGE + DEFAULT_FLAG_VALUE + CURRENT_FLAG_VALUE + FLAG_TYPE + [OPTIONAL_ELEMENTS] + )* + + +Notes: + +1. The output is intentionally similar to the output generated by the +C++ command-line flag library. The few differences are due to the +Python flags that do not have a C++ equivalent (at least not yet), +e.g., DEFINE_list. + +2. New XML elements may be added in the future. + +3. DEFAULT_FLAG_VALUE is in serialized form, i.e., the string you can +pass for this flag on the command-line. E.g., for a flag defined +using DEFINE_list, this field may be foo,bar, not ['foo', 'bar']. + +4. CURRENT_FLAG_VALUE is produced using str(). This means that the +string 'false' will be represented in the same way as the boolean +False. Using repr() would have removed this ambiguity and simplified +parsing, but would have broken the compatibility with the C++ +command-line flags. + +5. OPTIONAL_ELEMENTS describe elements relevant for certain kinds of +flags: lower_bound, upper_bound (for flags that specify bounds), +enum_value (for enum flags), list_separator (for flags that consist of +a list of values, separated by a special token). + +6. We do not provide any example here: please use --helpxml instead. +""" + +import cgi +import getopt +import os +import re +import string +import sys + +# Are we running at least python 2.2? +try: + if tuple(sys.version_info[:3]) < (2,2,0): + raise NotImplementedError("requires python 2.2.0 or later") +except AttributeError: # a very old python, that lacks sys.version_info + raise NotImplementedError("requires python 2.2.0 or later") + +# If we're not running at least python 2.2.1, define True, False, and bool. +# Thanks, Guido, for the code. +try: + True, False, bool +except NameError: + False = 0 + True = 1 + def bool(x): + if x: + return True + else: + return False + +# Are we running under pychecker? +_RUNNING_PYCHECKER = 'pychecker.python' in sys.modules + + +def _GetCallingModule(): + """Returns the name of the module that's calling into this module. + + We generally use this function to get the name of the module calling a + DEFINE_foo... function. + """ + # Walk down the stack to find the first globals dict that's not ours. + for depth in range(1, sys.getrecursionlimit()): + if not sys._getframe(depth).f_globals is globals(): + globals_for_frame = sys._getframe(depth).f_globals + module_name = _GetModuleObjectAndName(globals_for_frame)[1] + if module_name is not None: + return module_name + raise AssertionError("No module was found") + + +def _GetThisModuleObjectAndName(): + """Returns: (module object, module name) for this module.""" + return _GetModuleObjectAndName(globals()) + + +# module exceptions: +class FlagsError(Exception): + """The base class for all flags errors.""" + pass + + +class DuplicateFlag(FlagsError): + """Raised if there is a flag naming conflict.""" + pass + + +class DuplicateFlagCannotPropagateNoneToSwig(DuplicateFlag): + """Special case of DuplicateFlag -- SWIG flag value can't be set to None. + + This can be raised when a duplicate flag is created. Even if allow_override is + True, we still abort if the new value is None, because it's currently + impossible to pass None default value back to SWIG. See FlagValues.SetDefault + for details. + """ + pass + + +# A DuplicateFlagError conveys more information than a +# DuplicateFlag. Since there are external modules that create +# DuplicateFlags, the interface to DuplicateFlag shouldn't change. +class DuplicateFlagError(DuplicateFlag): + + def __init__(self, flagname, flag_values): + self.flagname = flagname + message = "The flag '%s' is defined twice." % self.flagname + flags_by_module = flag_values.FlagsByModuleDict() + for module in flags_by_module: + for flag in flags_by_module[module]: + if flag.name == flagname or flag.short_name == flagname: + message = message + " First from " + module + "," + break + message = message + " Second from " + _GetCallingModule() + DuplicateFlag.__init__(self, message) + + +class IllegalFlagValue(FlagsError): + """The flag command line argument is illegal.""" + pass + + +class UnrecognizedFlag(FlagsError): + """Raised if a flag is unrecognized.""" + pass + + +# An UnrecognizedFlagError conveys more information than an +# UnrecognizedFlag. Since there are external modules that create +# DuplicateFlags, the interface to DuplicateFlag shouldn't change. +class UnrecognizedFlagError(UnrecognizedFlag): + def __init__(self, flagname): + self.flagname = flagname + UnrecognizedFlag.__init__( + self, "Unknown command line flag '%s'" % flagname) + + +# Global variable used by expvar +_exported_flags = {} +_help_width = 80 # width of help output + + +def GetHelpWidth(): + """Returns: an integer, the width of help lines that is used in TextWrap.""" + return _help_width + + +def CutCommonSpacePrefix(text): + """Removes a common space prefix from the lines of a multiline text. + + If the first line does not start with a space, it is left as it is and + only in the remaining lines a common space prefix is being searched + for. That means the first line will stay untouched. This is especially + useful to turn doc strings into help texts. This is because some + people prefer to have the doc comment start already after the + apostrophy and then align the following lines while others have the + apostrophies on a seperately line. + + The function also drops trailing empty lines and ignores empty lines + following the initial content line while calculating the initial + common whitespace. + + Args: + text: text to work on + + Returns: + the resulting text + """ + text_lines = text.splitlines() + # Drop trailing empty lines + while text_lines and not text_lines[-1]: + text_lines = text_lines[:-1] + if text_lines: + # We got some content, is the first line starting with a space? + if text_lines[0] and text_lines[0][0].isspace(): + text_first_line = [] + else: + text_first_line = [text_lines.pop(0)] + # Calculate length of common leading whitesppace (only over content lines) + common_prefix = os.path.commonprefix([line for line in text_lines if line]) + space_prefix_len = len(common_prefix) - len(common_prefix.lstrip()) + # If we have a common space prefix, drop it from all lines + if space_prefix_len: + for index in xrange(len(text_lines)): + if text_lines[index]: + text_lines[index] = text_lines[index][space_prefix_len:] + return '\n'.join(text_first_line + text_lines) + return '' + + +def TextWrap(text, length=None, indent='', firstline_indent=None, tabs=' '): + """Wraps a given text to a maximum line length and returns it. + + We turn lines that only contain whitespaces into empty lines. We keep + new lines and tabs (e.g., we do not treat tabs as spaces). + + Args: + text: text to wrap + length: maximum length of a line, includes indentation + if this is None then use GetHelpWidth() + indent: indent for all but first line + firstline_indent: indent for first line; if None, fall back to indent + tabs: replacement for tabs + + Returns: + wrapped text + + Raises: + FlagsError: if indent not shorter than length + FlagsError: if firstline_indent not shorter than length + """ + # Get defaults where callee used None + if length is None: + length = GetHelpWidth() + if indent is None: + indent = '' + if len(indent) >= length: + raise FlagsError('Indent must be shorter than length') + # In line we will be holding the current line which is to be started + # with indent (or firstline_indent if available) and then appended + # with words. + if firstline_indent is None: + firstline_indent = '' + line = indent + else: + line = firstline_indent + if len(firstline_indent) >= length: + raise FlagsError('First iline indent must be shorter than length') + + # If the callee does not care about tabs we simply convert them to + # spaces If callee wanted tabs to be single space then we do that + # already here. + if not tabs or tabs == ' ': + text = text.replace('\t', ' ') + else: + tabs_are_whitespace = not tabs.strip() + + line_regex = re.compile('([ ]*)(\t*)([^ \t]+)', re.MULTILINE) + + # Split the text into lines and the lines with the regex above. The + # resulting lines are collected in result[]. For each split we get the + # spaces, the tabs and the next non white space (e.g. next word). + result = [] + for text_line in text.splitlines(): + # Store result length so we can find out whether processing the next + # line gave any new content + old_result_len = len(result) + # Process next line with line_regex. For optimization we do an rstrip(). + # - process tabs (changes either line or word, see below) + # - process word (first try to squeeze on line, then wrap or force wrap) + # Spaces found on the line are ignored, they get added while wrapping as + # needed. + for spaces, current_tabs, word in line_regex.findall(text_line.rstrip()): + # If tabs weren't converted to spaces, handle them now + if current_tabs: + # If the last thing we added was a space anyway then drop + # it. But let's not get rid of the indentation. + if (((result and line != indent) or + (not result and line != firstline_indent)) and line[-1] == ' '): + line = line[:-1] + # Add the tabs, if that means adding whitespace, just add it at + # the line, the rstrip() code while shorten the line down if + # necessary + if tabs_are_whitespace: + line += tabs * len(current_tabs) + else: + # if not all tab replacement is whitespace we prepend it to the word + word = tabs * len(current_tabs) + word + # Handle the case where word cannot be squeezed onto current last line + if len(line) + len(word) > length and len(indent) + len(word) <= length: + result.append(line.rstrip()) + line = indent + word + word = '' + # No space left on line or can we append a space? + if len(line) + 1 >= length: + result.append(line.rstrip()) + line = indent + else: + line += ' ' + # Add word and shorten it up to allowed line length. Restart next + # line with indent and repeat, or add a space if we're done (word + # finished) This deals with words that caanot fit on one line + # (e.g. indent + word longer than allowed line length). + while len(line) + len(word) >= length: + line += word + result.append(line[:length]) + word = line[length:] + line = indent + # Default case, simply append the word and a space + if word: + line += word + ' ' + # End of input line. If we have content we finish the line. If the + # current line is just the indent but we had content in during this + # original line then we need to add an emoty line. + if (result and line != indent) or (not result and line != firstline_indent): + result.append(line.rstrip()) + elif len(result) == old_result_len: + result.append('') + line = indent + + return '\n'.join(result) + + +def DocToHelp(doc): + """Takes a __doc__ string and reformats it as help.""" + + # Get rid of starting and ending white space. Using lstrip() or even + # strip() could drop more than maximum of first line and right space + # of last line. + doc = doc.strip() + + # Get rid of all empty lines + whitespace_only_line = re.compile('^[ \t]+$', re.M) + doc = whitespace_only_line.sub('', doc) + + # Cut out common space at line beginnings + doc = CutCommonSpacePrefix(doc) + + # Just like this module's comment, comments tend to be aligned somehow. + # In other words they all start with the same amount of white space + # 1) keep double new lines + # 2) keep ws after new lines if not empty line + # 3) all other new lines shall be changed to a space + # Solution: Match new lines between non white space and replace with space. + doc = re.sub('(?<=\S)\n(?=\S)', ' ', doc, re.M) + + return doc + + +def _GetModuleObjectAndName(globals_dict): + """Returns the module that defines a global environment, and its name. + + Args: + globals_dict: A dictionary that should correspond to an environment + providing the values of the globals. + + Returns: + A pair consisting of (1) module object and (2) module name (a + string). Returns (None, None) if the module could not be + identified. + """ + # The use of .items() (instead of .iteritems()) is NOT a mistake: if + # a parallel thread imports a module while we iterate over + # .iteritems() (not nice, but possible), we get a RuntimeError ... + # Hence, we use the slightly slower but safer .items(). + for name, module in sys.modules.items(): + if getattr(module, '__dict__', None) is globals_dict: + if name == '__main__': + # Pick a more informative name for the main module. + name = sys.argv[0] + return (module, name) + return (None, None) + + +def _GetMainModule(): + """Returns the name of the module from which execution started.""" + for depth in range(1, sys.getrecursionlimit()): + try: + globals_of_main = sys._getframe(depth).f_globals + except ValueError: + return _GetModuleObjectAndName(globals_of_main)[1] + raise AssertionError("No module was found") + + +class FlagValues: + """Registry of 'Flag' objects. + + A 'FlagValues' can then scan command line arguments, passing flag + arguments through to the 'Flag' objects that it owns. It also + provides easy access to the flag values. Typically only one + 'FlagValues' object is needed by an application: gflags.FLAGS + + This class is heavily overloaded: + + 'Flag' objects are registered via __setitem__: + FLAGS['longname'] = x # register a new flag + + The .value attribute of the registered 'Flag' objects can be accessed + as attributes of this 'FlagValues' object, through __getattr__. Both + the long and short name of the original 'Flag' objects can be used to + access its value: + FLAGS.longname # parsed flag value + FLAGS.x # parsed flag value (short name) + + Command line arguments are scanned and passed to the registered 'Flag' + objects through the __call__ method. Unparsed arguments, including + argv[0] (e.g. the program name) are returned. + argv = FLAGS(sys.argv) # scan command line arguments + + The original registered Flag objects can be retrieved through the use + of the dictionary-like operator, __getitem__: + x = FLAGS['longname'] # access the registered Flag object + + The str() operator of a 'FlagValues' object provides help for all of + the registered 'Flag' objects. + """ + + def __init__(self): + # Since everything in this class is so heavily overloaded, the only + # way of defining and using fields is to access __dict__ directly. + + # Dictionary: flag name (string) -> Flag object. + self.__dict__['__flags'] = {} + # Dictionary: module name (string) -> list of Flag objects that are defined + # by that module. + self.__dict__['__flags_by_module'] = {} + # Dictionary: module name (string) -> list of Flag objects that are + # key for that module. + self.__dict__['__key_flags_by_module'] = {} + + # Set if we should use new style gnu_getopt rather than getopt when parsing + # the args. Only possible with Python 2.3+ + self.UseGnuGetOpt(False) + + def UseGnuGetOpt(self, use_gnu_getopt=True): + self.__dict__['__use_gnu_getopt'] = use_gnu_getopt + + def IsGnuGetOpt(self): + return self.__dict__['__use_gnu_getopt'] + + def FlagDict(self): + return self.__dict__['__flags'] + + def FlagsByModuleDict(self): + """Returns the dictionary of module_name -> list of defined flags. + + Returns: + A dictionary. Its keys are module names (strings). Its values + are lists of Flag objects. + """ + return self.__dict__['__flags_by_module'] + + def KeyFlagsByModuleDict(self): + """Returns the dictionary of module_name -> list of key flags. + + Returns: + A dictionary. Its keys are module names (strings). Its values + are lists of Flag objects. + """ + return self.__dict__['__key_flags_by_module'] + + def _RegisterFlagByModule(self, module_name, flag): + """Records the module that defines a specific flag. + + We keep track of which flag is defined by which module so that we + can later sort the flags by module. + + Args: + module_name: A string, the name of a Python module. + flag: A Flag object, a flag that is key to the module. + """ + flags_by_module = self.FlagsByModuleDict() + flags_by_module.setdefault(module_name, []).append(flag) + + def _RegisterKeyFlagForModule(self, module_name, flag): + """Specifies that a flag is a key flag for a module. + + Args: + module_name: A string, the name of a Python module. + flag: A Flag object, a flag that is key to the module. + """ + key_flags_by_module = self.KeyFlagsByModuleDict() + # The list of key flags for the module named module_name. + key_flags = key_flags_by_module.setdefault(module_name, []) + # Add flag, but avoid duplicates. + if flag not in key_flags: + key_flags.append(flag) + + def _GetFlagsDefinedByModule(self, module): + """Returns the list of flags defined by a module. + + Args: + module: A module object or a module name (a string). + + Returns: + A new list of Flag objects. Caller may update this list as he + wishes: none of those changes will affect the internals of this + FlagValue object. + """ + if not isinstance(module, str): + module = module.__name__ + + return list(self.FlagsByModuleDict().get(module, [])) + + def _GetKeyFlagsForModule(self, module): + """Returns the list of key flags for a module. + + Args: + module: A module object or a module name (a string) + + Returns: + A new list of Flag objects. Caller may update this list as he + wishes: none of those changes will affect the internals of this + FlagValue object. + """ + if not isinstance(module, str): + module = module.__name__ + + # Any flag is a key flag for the module that defined it. NOTE: + # key_flags is a fresh list: we can update it without affecting the + # internals of this FlagValues object. + key_flags = self._GetFlagsDefinedByModule(module) + + # Take into account flags explicitly declared as key for a module. + for flag in self.KeyFlagsByModuleDict().get(module, []): + if flag not in key_flags: + key_flags.append(flag) + return key_flags + + def AppendFlagValues(self, flag_values): + """Appends flags registered in another FlagValues instance. + + Args: + flag_values: registry to copy from + """ + for flag_name, flag in flag_values.FlagDict().iteritems(): + # Each flags with shortname appears here twice (once under its + # normal name, and again with its short name). To prevent + # problems (DuplicateFlagError) with double flag registration, we + # perform a check to make sure that the entry we're looking at is + # for its normal name. + if flag_name == flag.name: + self[flag_name] = flag + + def RemoveFlagValues(self, flag_values): + """Remove flags that were previously appended from another FlagValues. + + Args: + flag_values: registry containing flags to remove. + """ + for flag_name in flag_values.FlagDict(): + self.__delattr__(flag_name) + + def __setitem__(self, name, flag): + """Registers a new flag variable.""" + fl = self.FlagDict() + if not isinstance(flag, Flag): + raise IllegalFlagValue(flag) + if not isinstance(name, type("")): + raise FlagsError("Flag name must be a string") + if len(name) == 0: + raise FlagsError("Flag name cannot be empty") + # If running under pychecker, duplicate keys are likely to be + # defined. Disable check for duplicate keys when pycheck'ing. + if (fl.has_key(name) and not flag.allow_override and + not fl[name].allow_override and not _RUNNING_PYCHECKER): + raise DuplicateFlagError(name, self) + short_name = flag.short_name + if short_name is not None: + if (fl.has_key(short_name) and not flag.allow_override and + not fl[short_name].allow_override and not _RUNNING_PYCHECKER): + raise DuplicateFlagError(short_name, self) + fl[short_name] = flag + fl[name] = flag + global _exported_flags + _exported_flags[name] = flag + + def __getitem__(self, name): + """Retrieves the Flag object for the flag --name.""" + return self.FlagDict()[name] + + def __getattr__(self, name): + """Retrieves the 'value' attribute of the flag --name.""" + fl = self.FlagDict() + if not fl.has_key(name): + raise AttributeError(name) + return fl[name].value + + def __setattr__(self, name, value): + """Sets the 'value' attribute of the flag --name.""" + fl = self.FlagDict() + fl[name].value = value + return value + + def _FlagIsRegistered(self, flag_obj): + """Checks whether a Flag object is registered under some name. + + Note: this is non trivial: in addition to its normal name, a flag + may have a short name too. In self.FlagDict(), both the normal and + the short name are mapped to the same flag object. E.g., calling + only "del FLAGS.short_name" is not unregistering the corresponding + Flag object (it is still registered under the longer name). + + Args: + flag_obj: A Flag object. + + Returns: + A boolean: True iff flag_obj is registered under some name. + """ + flag_dict = self.FlagDict() + # Check whether flag_obj is registered under its long name. + name = flag_obj.name + if flag_dict.get(name, None) == flag_obj: + return True + # Check whether flag_obj is registered under its short name. + short_name = flag_obj.short_name + if (short_name is not None and + flag_dict.get(short_name, None) == flag_obj): + return True + # The flag cannot be registered under any other name, so we do not + # need to do a full search through the values of self.FlagDict(). + return False + + def __delattr__(self, flag_name): + """Deletes a previously-defined flag from a flag object. + + This method makes sure we can delete a flag by using + + del flag_values_object. + + E.g., + + flags.DEFINE_integer('foo', 1, 'Integer flag.') + del flags.FLAGS.foo + + Args: + flag_name: A string, the name of the flag to be deleted. + + Raises: + AttributeError: When there is no registered flag named flag_name. + """ + fl = self.FlagDict() + if flag_name not in fl: + raise AttributeError(flag_name) + + flag_obj = fl[flag_name] + del fl[flag_name] + + if not self._FlagIsRegistered(flag_obj): + # If the Flag object indicated by flag_name is no longer + # registered (please see the docstring of _FlagIsRegistered), then + # we delete the occurences of the flag object in all our internal + # dictionaries. + self.__RemoveFlagFromDictByModule(self.FlagsByModuleDict(), flag_obj) + self.__RemoveFlagFromDictByModule(self.KeyFlagsByModuleDict(), flag_obj) + + def __RemoveFlagFromDictByModule(self, flags_by_module_dict, flag_obj): + """Removes a flag object from a module -> list of flags dictionary. + + Args: + flags_by_module_dict: A dictionary that maps module names to lists of + flags. + flag_obj: A flag object. + """ + for unused_module, flags_in_module in flags_by_module_dict.iteritems(): + # while (as opposed to if) takes care of multiple occurences of a + # flag in the list for the same module. + while flag_obj in flags_in_module: + flags_in_module.remove(flag_obj) + + def SetDefault(self, name, value): + """Changes the default value of the named flag object.""" + fl = self.FlagDict() + if not fl.has_key(name): + raise AttributeError(name) + fl[name].SetDefault(value) + + def __contains__(self, name): + """Returns True if name is a value (flag) in the dict.""" + return name in self.FlagDict() + + has_key = __contains__ # a synonym for __contains__() + + def __iter__(self): + return self.FlagDict().iterkeys() + + def __call__(self, argv): + """Parses flags from argv; stores parsed flags into this FlagValues object. + + All unparsed arguments are returned. Flags are parsed using the GNU + Program Argument Syntax Conventions, using getopt: + + http://www.gnu.org/software/libc/manual/html_mono/libc.html#Getopt + + Args: + argv: argument list. Can be of any type that may be converted to a list. + + Returns: + The list of arguments not parsed as options, including argv[0] + + Raises: + FlagsError: on any parsing error + """ + # Support any sequence type that can be converted to a list + argv = list(argv) + + shortopts = "" + longopts = [] + + fl = self.FlagDict() + + # This pre parses the argv list for --flagfile=<> options. + argv = argv[:1] + self.ReadFlagsFromFiles(argv[1:], force_gnu=False) + + # Correct the argv to support the google style of passing boolean + # parameters. Boolean parameters may be passed by using --mybool, + # --nomybool, --mybool=(true|false|1|0). getopt does not support + # having options that may or may not have a parameter. We replace + # instances of the short form --mybool and --nomybool with their + # full forms: --mybool=(true|false). + original_argv = list(argv) # list() makes a copy + shortest_matches = None + for name, flag in fl.items(): + if not flag.boolean: + continue + if shortest_matches is None: + # Determine the smallest allowable prefix for all flag names + shortest_matches = self.ShortestUniquePrefixes(fl) + no_name = 'no' + name + prefix = shortest_matches[name] + no_prefix = shortest_matches[no_name] + + # Replace all occurences of this boolean with extended forms + for arg_idx in range(1, len(argv)): + arg = argv[arg_idx] + if arg.find('=') >= 0: continue + if arg.startswith('--'+prefix) and ('--'+name).startswith(arg): + argv[arg_idx] = ('--%s=true' % name) + elif arg.startswith('--'+no_prefix) and ('--'+no_name).startswith(arg): + argv[arg_idx] = ('--%s=false' % name) + + # Loop over all of the flags, building up the lists of short options + # and long options that will be passed to getopt. Short options are + # specified as a string of letters, each letter followed by a colon + # if it takes an argument. Long options are stored in an array of + # strings. Each string ends with an '=' if it takes an argument. + for name, flag in fl.items(): + longopts.append(name + "=") + if len(name) == 1: # one-letter option: allow short flag type also + shortopts += name + if not flag.boolean: + shortopts += ":" + + longopts.append('undefok=') + undefok_flags = [] + + # In case --undefok is specified, loop to pick up unrecognized + # options one by one. + unrecognized_opts = [] + args = argv[1:] + while True: + try: + if self.__dict__['__use_gnu_getopt']: + optlist, unparsed_args = getopt.gnu_getopt(args, shortopts, longopts) + else: + optlist, unparsed_args = getopt.getopt(args, shortopts, longopts) + break + except getopt.GetoptError, e: + if not e.opt or e.opt in fl: + # Not an unrecognized option, reraise the exception as a FlagsError + raise FlagsError(e) + # Handle an unrecognized option. + unrecognized_opts.append(e.opt) + # Remove offender from args and try again + for arg_index in range(len(args)): + if ((args[arg_index] == '--' + e.opt) or + (args[arg_index] == '-' + e.opt) or + args[arg_index].startswith('--' + e.opt + '=')): + args = args[0:arg_index] + args[arg_index+1:] + break + else: + # We should have found the option, so we don't expect to get + # here. We could assert, but raising the original exception + # might work better. + raise FlagsError(e) + + for name, arg in optlist: + if name == '--undefok': + flag_names = arg.split(',') + undefok_flags.extend(flag_names) + # For boolean flags, if --undefok=boolflag is specified, then we should + # also accept --noboolflag, in addition to --boolflag. + # Since we don't know the type of the undefok'd flag, this will affect + # non-boolean flags as well. + # NOTE: You shouldn't use --undefok=noboolflag, because then we will + # accept --nonoboolflag here. We are choosing not to do the conversion + # from noboolflag -> boolflag because of the ambiguity that flag names + # can start with 'no'. + undefok_flags.extend('no' + name for name in flag_names) + continue + if name.startswith('--'): + # long option + name = name[2:] + short_option = 0 + else: + # short option + name = name[1:] + short_option = 1 + if fl.has_key(name): + flag = fl[name] + if flag.boolean and short_option: arg = 1 + flag.Parse(arg) + + # If there were unrecognized options, raise an exception unless + # the options were named via --undefok. + for opt in unrecognized_opts: + if opt not in undefok_flags: + raise UnrecognizedFlagError(opt) + + if unparsed_args: + if self.__dict__['__use_gnu_getopt']: + # if using gnu_getopt just return the program name + remainder of argv. + return argv[:1] + unparsed_args + else: + # unparsed_args becomes the first non-flag detected by getopt to + # the end of argv. Because argv may have been modified above, + # return original_argv for this region. + return argv[:1] + original_argv[-len(unparsed_args):] + else: + return argv[:1] + + def Reset(self): + """Resets the values to the point before FLAGS(argv) was called.""" + for f in self.FlagDict().values(): + f.Unparse() + + def RegisteredFlags(self): + """Returns: a list of the names and short names of all registered flags.""" + return self.FlagDict().keys() + + def FlagValuesDict(self): + """Returns: a dictionary that maps flag names to flag values.""" + flag_values = {} + + for flag_name in self.RegisteredFlags(): + flag = self.FlagDict()[flag_name] + flag_values[flag_name] = flag.value + + return flag_values + + def __str__(self): + """Generates a help string for all known flags.""" + return self.GetHelp() + + def GetHelp(self, prefix=''): + """Generates a help string for all known flags.""" + helplist = [] + + flags_by_module = self.FlagsByModuleDict() + if flags_by_module: + + modules = flags_by_module.keys() + modules.sort() + + # Print the help for the main module first, if possible. + main_module = _GetMainModule() + if main_module in modules: + modules.remove(main_module) + modules = [main_module] + modules + + for module in modules: + self.__RenderOurModuleFlags(module, helplist) + + self.__RenderModuleFlags('gflags', + _SPECIAL_FLAGS.FlagDict().values(), + helplist) + + else: + # Just print one long list of flags. + self.__RenderFlagList( + self.FlagDict().values() + _SPECIAL_FLAGS.FlagDict().values(), + helplist, prefix) + + return '\n'.join(helplist) + + def __RenderModuleFlags(self, module, flags, output_lines, prefix=""): + """Generates a help string for a given module.""" + if not isinstance(module, str): + module = module.__name__ + output_lines.append('\n%s%s:' % (prefix, module)) + self.__RenderFlagList(flags, output_lines, prefix + " ") + + def __RenderOurModuleFlags(self, module, output_lines, prefix=""): + """Generates a help string for a given module.""" + flags = self._GetFlagsDefinedByModule(module) + if flags: + self.__RenderModuleFlags(module, flags, output_lines, prefix) + + def __RenderOurModuleKeyFlags(self, module, output_lines, prefix=""): + """Generates a help string for the key flags of a given module. + + Args: + module: A module object or a module name (a string). + output_lines: A list of strings. The generated help message + lines will be appended to this list. + prefix: A string that is prepended to each generated help line. + """ + key_flags = self._GetKeyFlagsForModule(module) + if key_flags: + self.__RenderModuleFlags(module, key_flags, output_lines, prefix) + + def ModuleHelp(self, module): + """Describe the key flags of a module. + + Args: + module: A module object or a module name (a string). + + Returns: + string describing the key flags of a module. + """ + helplist = [] + self.__RenderOurModuleKeyFlags(module, helplist) + return '\n'.join(helplist) + + def MainModuleHelp(self): + """Describe the key flags of the main module. + + Returns: + string describing the key flags of a module. + """ + return self.ModuleHelp(_GetMainModule()) + + def __RenderFlagList(self, flaglist, output_lines, prefix=" "): + fl = self.FlagDict() + special_fl = _SPECIAL_FLAGS.FlagDict() + flaglist = [(flag.name, flag) for flag in flaglist] + flaglist.sort() + flagset = {} + for (name, flag) in flaglist: + # It's possible this flag got deleted or overridden since being + # registered in the per-module flaglist. Check now against the + # canonical source of current flag information, the FlagDict. + if fl.get(name, None) != flag and special_fl.get(name, None) != flag: + # a different flag is using this name now + continue + # only print help once + if flagset.has_key(flag): continue + flagset[flag] = 1 + flaghelp = "" + if flag.short_name: flaghelp += "-%s," % flag.short_name + if flag.boolean: + flaghelp += "--[no]%s" % flag.name + ":" + else: + flaghelp += "--%s" % flag.name + ":" + flaghelp += " " + if flag.help: + flaghelp += flag.help + flaghelp = TextWrap(flaghelp, indent=prefix+" ", + firstline_indent=prefix) + if flag.default_as_str: + flaghelp += "\n" + flaghelp += TextWrap("(default: %s)" % flag.default_as_str, + indent=prefix+" ") + if flag.parser.syntactic_help: + flaghelp += "\n" + flaghelp += TextWrap("(%s)" % flag.parser.syntactic_help, + indent=prefix+" ") + output_lines.append(flaghelp) + + def get(self, name, default): + """Returns the value of a flag (if not None) or a default value. + + Args: + name: A string, the name of a flag. + default: Default value to use if the flag value is None. + """ + + value = self.__getattr__(name) + if value is not None: # Can't do if not value, b/c value might be '0' or "" + return value + else: + return default + + def ShortestUniquePrefixes(self, fl): + """Returns: dictionary; maps flag names to their shortest unique prefix.""" + # Sort the list of flag names + sorted_flags = [] + for name, flag in fl.items(): + sorted_flags.append(name) + if flag.boolean: + sorted_flags.append('no%s' % name) + sorted_flags.sort() + + # For each name in the sorted list, determine the shortest unique + # prefix by comparing itself to the next name and to the previous + # name (the latter check uses cached info from the previous loop). + shortest_matches = {} + prev_idx = 0 + for flag_idx in range(len(sorted_flags)): + curr = sorted_flags[flag_idx] + if flag_idx == (len(sorted_flags) - 1): + next = None + else: + next = sorted_flags[flag_idx+1] + next_len = len(next) + for curr_idx in range(len(curr)): + if (next is None + or curr_idx >= next_len + or curr[curr_idx] != next[curr_idx]): + # curr longer than next or no more chars in common + shortest_matches[curr] = curr[:max(prev_idx, curr_idx) + 1] + prev_idx = curr_idx + break + else: + # curr shorter than (or equal to) next + shortest_matches[curr] = curr + prev_idx = curr_idx + 1 # next will need at least one more char + return shortest_matches + + def __IsFlagFileDirective(self, flag_string): + """Checks whether flag_string contain a --flagfile= directive.""" + if isinstance(flag_string, type("")): + if flag_string.startswith('--flagfile='): + return 1 + elif flag_string == '--flagfile': + return 1 + elif flag_string.startswith('-flagfile='): + return 1 + elif flag_string == '-flagfile': + return 1 + else: + return 0 + return 0 + + def ExtractFilename(self, flagfile_str): + """Returns filename from a flagfile_str of form -[-]flagfile=filename. + + The cases of --flagfile foo and -flagfile foo shouldn't be hitting + this function, as they are dealt with in the level above this + function. + """ + if flagfile_str.startswith('--flagfile='): + return os.path.expanduser((flagfile_str[(len('--flagfile=')):]).strip()) + elif flagfile_str.startswith('-flagfile='): + return os.path.expanduser((flagfile_str[(len('-flagfile=')):]).strip()) + else: + raise FlagsError('Hit illegal --flagfile type: %s' % flagfile_str) + + def __GetFlagFileLines(self, filename, parsed_file_list): + """Returns the useful (!=comments, etc) lines from a file with flags. + + Args: + filename: A string, the name of the flag file. + parsed_file_list: A list of the names of the files we have + already read. MUTATED BY THIS FUNCTION. + + Returns: + List of strings. See the note below. + + NOTE(springer): This function checks for a nested --flagfile= + tag and handles the lower file recursively. It returns a list of + all the lines that _could_ contain command flags. This is + EVERYTHING except whitespace lines and comments (lines starting + with '#' or '//'). + """ + line_list = [] # All line from flagfile. + flag_line_list = [] # Subset of lines w/o comments, blanks, flagfile= tags. + try: + file_obj = open(filename, 'r') + except IOError, e_msg: + print e_msg + print 'ERROR:: Unable to open flagfile: %s' % (filename) + return flag_line_list + + line_list = file_obj.readlines() + file_obj.close() + parsed_file_list.append(filename) + + # This is where we check each line in the file we just read. + for line in line_list: + if line.isspace(): + pass + # Checks for comment (a line that starts with '#'). + elif line.startswith('#') or line.startswith('//'): + pass + # Checks for a nested "--flagfile=" flag in the current file. + # If we find one, recursively parse down into that file. + elif self.__IsFlagFileDirective(line): + sub_filename = self.ExtractFilename(line) + # We do a little safety check for reparsing a file we've already done. + if not sub_filename in parsed_file_list: + included_flags = self.__GetFlagFileLines(sub_filename, + parsed_file_list) + flag_line_list.extend(included_flags) + else: # Case of hitting a circularly included file. + print >>sys.stderr, ('Warning: Hit circular flagfile dependency: %s' + % sub_filename) + else: + # Any line that's not a comment or a nested flagfile should get + # copied into 2nd position. This leaves earlier arguements + # further back in the list, thus giving them higher priority. + flag_line_list.append(line.strip()) + return flag_line_list + + def ReadFlagsFromFiles(self, argv, force_gnu=True): + """Processes command line args, but also allow args to be read from file. + Args: + argv: A list of strings, usually sys.argv[1:], which may contain one or + more flagfile directives of the form --flagfile="./filename". + Note that the name of the program (sys.argv[0]) should be omitted. + force_gnu: If False, --flagfile parsing obeys normal flag semantics. + If True, --flagfile parsing instead follows gnu_getopt semantics. + *** WARNING *** force_gnu=False may become the future default! + + Returns: + + A new list which has the original list combined with what we read + from any flagfile(s). + + References: Global gflags.FLAG class instance. + + This function should be called before the normal FLAGS(argv) call. + This function scans the input list for a flag that looks like: + --flagfile=. Then it opens , reads all valid key + and value pairs and inserts them into the input list between the + first item of the list and any subsequent items in the list. + + Note that your application's flags are still defined the usual way + using gflags DEFINE_flag() type functions. + + Notes (assuming we're getting a commandline of some sort as our input): + --> Flags from the command line argv _should_ always take precedence! + --> A further "--flagfile=" CAN be nested in a flagfile. + It will be processed after the parent flag file is done. + --> For duplicate flags, first one we hit should "win". + --> In a flagfile, a line beginning with # or // is a comment. + --> Entirely blank lines _should_ be ignored. + """ + parsed_file_list = [] + rest_of_args = argv + new_argv = [] + while rest_of_args: + current_arg = rest_of_args[0] + rest_of_args = rest_of_args[1:] + if self.__IsFlagFileDirective(current_arg): + # This handles the case of -(-)flagfile foo. In this case the + # next arg really is part of this one. + if current_arg == '--flagfile' or current_arg == '-flagfile': + if not rest_of_args: + raise IllegalFlagValue('--flagfile with no argument') + flag_filename = os.path.expanduser(rest_of_args[0]) + rest_of_args = rest_of_args[1:] + else: + # This handles the case of (-)-flagfile=foo. + flag_filename = self.ExtractFilename(current_arg) + new_argv[0:0] = self.__GetFlagFileLines(flag_filename, parsed_file_list) + else: + new_argv.append(current_arg) + # Stop parsing after '--', like getopt and gnu_getopt. + if current_arg == '--': + break + # Stop parsing after a non-flag, like getopt. + if not current_arg.startswith('-'): + if not force_gnu and not self.__dict__['__use_gnu_getopt']: + break + + if rest_of_args: + new_argv.extend(rest_of_args) + + return new_argv + + def FlagsIntoString(self): + """Returns a string with the flags assignments from this FlagValues object. + + This function ignores flags whose value is None. Each flag + assignment is separated by a newline. + + NOTE: MUST mirror the behavior of the C++ function + CommandlineFlagsIntoString from google3/base/commandlineflags.cc. + """ + s = '' + for flag in self.FlagDict().values(): + if flag.value is not None: + s += flag.Serialize() + '\n' + return s + + def AppendFlagsIntoFile(self, filename): + """Appends all flags assignments from this FlagInfo object to a file. + + Output will be in the format of a flagfile. + + NOTE: MUST mirror the behavior of the C++ version of + AppendFlagsIntoFile from google3/base/commandlineflags.cc. + """ + out_file = open(filename, 'a') + out_file.write(self.FlagsIntoString()) + out_file.close() + + def WriteHelpInXMLFormat(self, outfile=None): + """Outputs flag documentation in XML format. + + NOTE: We use element names that are consistent with those used by + the C++ command-line flag library, from + google3/base/commandlineflags_reporting.cc. We also use a few new + elements (e.g., ), but we do not interfere / overlap with + existing XML elements used by the C++ library. Please maintain this + consistency. + + Args: + outfile: File object we write to. Default None means sys.stdout. + """ + outfile = outfile or sys.stdout + + outfile.write('\n') + outfile.write('\n') + indent = ' ' + _WriteSimpleXMLElement(outfile, 'program', os.path.basename(sys.argv[0]), + indent) + + usage_doc = sys.modules['__main__'].__doc__ + if not usage_doc: + usage_doc = '\nUSAGE: %s [flags]\n' % sys.argv[0] + else: + usage_doc = usage_doc.replace('%s', sys.argv[0]) + _WriteSimpleXMLElement(outfile, 'usage', usage_doc, indent) + + # Get list of key flags for the main module. + key_flags = self._GetKeyFlagsForModule(_GetMainModule()) + + # Sort flags by declaring module name and next by flag name. + flags_by_module = self.FlagsByModuleDict() + all_module_names = list(flags_by_module.keys()) + all_module_names.sort() + for module_name in all_module_names: + flag_list = [(f.name, f) for f in flags_by_module[module_name]] + flag_list.sort() + for unused_flag_name, flag in flag_list: + is_key = flag in key_flags + flag.WriteInfoInXMLFormat(outfile, module_name, + is_key=is_key, indent=indent) + + outfile.write('\n') + outfile.flush() +# end of FlagValues definition + + +# The global FlagValues instance +FLAGS = FlagValues() + + +def _MakeXMLSafe(s): + """Escapes <, >, and & from s, and removes XML 1.0-illegal chars.""" + s = cgi.escape(s) # Escape <, >, and & + # Remove characters that cannot appear in an XML 1.0 document + # (http://www.w3.org/TR/REC-xml/#charsets). + # + # NOTE: if there are problems with current solution, one may move to + # XML 1.1, which allows such chars, if they're entity-escaped (&#xHH;). + s = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', s) + return s + + +def _WriteSimpleXMLElement(outfile, name, value, indent): + """Writes a simple XML element. + + Args: + outfile: File object we write the XML element to. + name: A string, the name of XML element. + value: A Python object, whose string representation will be used + as the value of the XML element. + indent: A string, prepended to each line of generated output. + """ + value_str = str(value) + if isinstance(value, bool): + # Display boolean values as the C++ flag library does: no caps. + value_str = value_str.lower() + outfile.write('%s<%s>%s\n' % + (indent, name, _MakeXMLSafe(value_str), name)) + + +class Flag: + """Information about a command-line flag. + + 'Flag' objects define the following fields: + .name - the name for this flag + .default - the default value for this flag + .default_as_str - default value as repr'd string, e.g., "'true'" (or None) + .value - the most recent parsed value of this flag; set by Parse() + .help - a help string or None if no help is available + .short_name - the single letter alias for this flag (or None) + .boolean - if 'true', this flag does not accept arguments + .present - true if this flag was parsed from command line flags. + .parser - an ArgumentParser object + .serializer - an ArgumentSerializer object + .allow_override - the flag may be redefined without raising an error + + The only public method of a 'Flag' object is Parse(), but it is + typically only called by a 'FlagValues' object. The Parse() method is + a thin wrapper around the 'ArgumentParser' Parse() method. The parsed + value is saved in .value, and the .present attribute is updated. If + this flag was already present, a FlagsError is raised. + + Parse() is also called during __init__ to parse the default value and + initialize the .value attribute. This enables other python modules to + safely use flags even if the __main__ module neglects to parse the + command line arguments. The .present attribute is cleared after + __init__ parsing. If the default value is set to None, then the + __init__ parsing step is skipped and the .value attribute is + initialized to None. + + Note: The default value is also presented to the user in the help + string, so it is important that it be a legal value for this flag. + """ + + def __init__(self, parser, serializer, name, default, help_string, + short_name=None, boolean=0, allow_override=0): + self.name = name + + if not help_string: + help_string = '(no help available)' + + self.help = help_string + self.short_name = short_name + self.boolean = boolean + self.present = 0 + self.parser = parser + self.serializer = serializer + self.allow_override = allow_override + self.value = None + + self.SetDefault(default) + + def __GetParsedValueAsString(self, value): + if value is None: + return None + if self.serializer: + return repr(self.serializer.Serialize(value)) + if self.boolean: + if value: + return repr('true') + else: + return repr('false') + return repr(str(value)) + + def Parse(self, argument): + try: + self.value = self.parser.Parse(argument) + except ValueError, e: # recast ValueError as IllegalFlagValue + raise IllegalFlagValue("flag --%s=%s: %s" % (self.name, argument, e)) + self.present += 1 + + def Unparse(self): + if self.default is None: + self.value = None + else: + self.Parse(self.default) + self.present = 0 + + def Serialize(self): + if self.value is None: + return '' + if self.boolean: + if self.value: + return "--%s" % self.name + else: + return "--no%s" % self.name + else: + if not self.serializer: + raise FlagsError("Serializer not present for flag %s" % self.name) + return "--%s=%s" % (self.name, self.serializer.Serialize(self.value)) + + def SetDefault(self, value): + """Changes the default value (and current value too) for this Flag.""" + # We can't allow a None override because it may end up not being + # passed to C++ code when we're overriding C++ flags. So we + # cowardly bail out until someone fixes the semantics of trying to + # pass None to a C++ flag. See swig_flags.Init() for details on + # this behavior. + if value is None and self.allow_override: + raise DuplicateFlagCannotPropagateNoneToSwig(self.name) + + self.default = value + self.Unparse() + self.default_as_str = self.__GetParsedValueAsString(self.value) + + def Type(self): + """Returns: a string that describes the type of this Flag.""" + # NOTE: we use strings, and not the types.*Type constants because + # our flags can have more exotic types, e.g., 'comma separated list + # of strings', 'whitespace separated list of strings', etc. + return self.parser.Type() + + def WriteInfoInXMLFormat(self, outfile, module_name, is_key=False, indent=''): + """Writes common info about this flag, in XML format. + + This is information that is relevant to all flags (e.g., name, + meaning, etc.). If you defined a flag that has some other pieces of + info, then please override _WriteCustomInfoInXMLFormat. + + Please do NOT override this method. + + Args: + outfile: File object we write to. + module_name: A string, the name of the module that defines this flag. + is_key: A boolean, True iff this flag is key for main module. + indent: A string that is prepended to each generated line. + """ + outfile.write(indent + '\n') + inner_indent = indent + ' ' + if is_key: + _WriteSimpleXMLElement(outfile, 'key', 'yes', inner_indent) + _WriteSimpleXMLElement(outfile, 'file', module_name, inner_indent) + # Print flag features that are relevant for all flags. + _WriteSimpleXMLElement(outfile, 'name', self.name, inner_indent) + if self.short_name: + _WriteSimpleXMLElement(outfile, 'short_name', self.short_name, + inner_indent) + if self.help: + _WriteSimpleXMLElement(outfile, 'meaning', self.help, inner_indent) + # The default flag value can either be represented as a string like on the + # command line, or as a Python object. We serialize this value in the + # latter case in order to remain consistent. + if self.serializer and not isinstance(self.default, str): + default_serialized = self.serializer.Serialize(self.default) + else: + default_serialized = self.default + _WriteSimpleXMLElement(outfile, 'default', default_serialized, inner_indent) + _WriteSimpleXMLElement(outfile, 'current', self.value, inner_indent) + _WriteSimpleXMLElement(outfile, 'type', self.Type(), inner_indent) + # Print extra flag features this flag may have. + self._WriteCustomInfoInXMLFormat(outfile, inner_indent) + outfile.write(indent + '\n') + + def _WriteCustomInfoInXMLFormat(self, outfile, indent): + """Writes extra info about this flag, in XML format. + + "Extra" means "not already printed by WriteInfoInXMLFormat above." + + Args: + outfile: File object we write to. + indent: A string that is prepended to each generated line. + """ + # Usually, the parser knows the extra details about the flag, so + # we just forward the call to it. + self.parser.WriteCustomInfoInXMLFormat(outfile, indent) +# End of Flag definition + + +class ArgumentParser: + """Base class used to parse and convert arguments. + + The Parse() method checks to make sure that the string argument is a + legal value and convert it to a native type. If the value cannot be + converted, it should throw a 'ValueError' exception with a human + readable explanation of why the value is illegal. + + Subclasses should also define a syntactic_help string which may be + presented to the user to describe the form of the legal values. + """ + syntactic_help = "" + + def Parse(self, argument): + """Default implementation: always returns its argument unmodified.""" + return argument + + def Type(self): + return 'string' + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + pass + + +class ArgumentSerializer: + """Base class for generating string representations of a flag value.""" + + def Serialize(self, value): + return str(value) + + +class ListSerializer(ArgumentSerializer): + + def __init__(self, list_sep): + self.list_sep = list_sep + + def Serialize(self, value): + return self.list_sep.join([str(x) for x in value]) + + +# The DEFINE functions are explained in mode details in the module doc string. + + +def DEFINE(parser, name, default, help, flag_values=FLAGS, serializer=None, + **args): + """Registers a generic Flag object. + + NOTE: in the docstrings of all DEFINE* functions, "registers" is short + for "creates a new flag and registers it". + + Auxiliary function: clients should use the specialized DEFINE_ + function instead. + + Args: + parser: ArgumentParser that is used to parse the flag arguments. + name: A string, the flag name. + default: The default value of the flag. + help: A help string. + flag_values: FlagValues object the flag will be registered with. + serializer: ArgumentSerializer that serializes the flag value. + args: Dictionary with extra keyword args that are passes to the + Flag __init__. + """ + DEFINE_flag(Flag(parser, serializer, name, default, help, **args), + flag_values) + + +def DEFINE_flag(flag, flag_values=FLAGS): + """Registers a 'Flag' object with a 'FlagValues' object. + + By default, the global FLAGS 'FlagValue' object is used. + + Typical users will use one of the more specialized DEFINE_xxx + functions, such as DEFINE_string or DEFINE_integer. But developers + who need to create Flag objects themselves should use this function + to register their flags. + """ + # copying the reference to flag_values prevents pychecker warnings + fv = flag_values + fv[flag.name] = flag + # Tell flag_values who's defining the flag. + if isinstance(flag_values, FlagValues): + # Regarding the above isinstance test: some users pass funny + # values of flag_values (e.g., {}) in order to avoid the flag + # registration (in the past, there used to be a flag_values == + # FLAGS test here) and redefine flags with the same name (e.g., + # debug). To avoid breaking their code, we perform the + # registration only if flag_values is a real FlagValues object. + flag_values._RegisterFlagByModule(_GetCallingModule(), flag) + + +def _InternalDeclareKeyFlags(flag_names, + flag_values=FLAGS, key_flag_values=None): + """Declares a flag as key for the calling module. + + Internal function. User code should call DECLARE_key_flag or + ADOPT_module_key_flags instead. + + Args: + flag_names: A list of strings that are names of already-registered + Flag objects. + flag_values: A FlagValues object that the flags listed in + flag_names have registered with (the value of the flag_values + argument from the DEFINE_* calls that defined those flags). + This should almost never need to be overridden. + key_flag_values: A FlagValues object that (among possibly many + other things) keeps track of the key flags for each module. + Default None means "same as flag_values". This should almost + never need to be overridden. + + Raises: + UnrecognizedFlagError: when we refer to a flag that was not + defined yet. + """ + key_flag_values = key_flag_values or flag_values + + module = _GetCallingModule() + + for flag_name in flag_names: + if flag_name not in flag_values: + raise UnrecognizedFlagError(flag_name) + flag = flag_values.FlagDict()[flag_name] + key_flag_values._RegisterKeyFlagForModule(module, flag) + + +def DECLARE_key_flag(flag_name, flag_values=FLAGS): + """Declares one flag as key to the current module. + + Key flags are flags that are deemed really important for a module. + They are important when listing help messages; e.g., if the + --helpshort command-line flag is used, then only the key flags of the + main module are listed (instead of all flags, as in the case of + --help). + + Sample usage: + + flags.DECLARED_key_flag('flag_1') + + Args: + flag_name: A string, the name of an already declared flag. + (Redeclaring flags as key, including flags implicitly key + because they were declared in this module, is a no-op.) + flag_values: A FlagValues object. This should almost never + need to be overridden. + """ + if flag_name in _SPECIAL_FLAGS: + # Take care of the special flags, e.g., --flagfile, --undefok. + # These flags are defined in _SPECIAL_FLAGS, and are treated + # specially during flag parsing, taking precedence over the + # user-defined flags. + _InternalDeclareKeyFlags([flag_name], + flag_values=_SPECIAL_FLAGS, + key_flag_values=flag_values) + return + _InternalDeclareKeyFlags([flag_name], flag_values=flag_values) + + +def ADOPT_module_key_flags(module, flag_values=FLAGS): + """Declares that all flags key to a module are key to the current module. + + Args: + module: A module object. + flag_values: A FlagValues object. This should almost never need + to be overridden. + + Raises: + FlagsError: When given an argument that is a module name (a + string), instead of a module object. + """ + # NOTE(salcianu): an even better test would be if not + # isinstance(module, types.ModuleType) but I didn't want to import + # types for such a tiny use. + if isinstance(module, str): + raise FlagsError('Received module name %s; expected a module object.' + % module) + _InternalDeclareKeyFlags( + [f.name for f in flag_values._GetKeyFlagsForModule(module.__name__)], + flag_values=flag_values) + # If module is this flag module, take _SPECIAL_FLAGS into account. + if module == _GetThisModuleObjectAndName()[0]: + _InternalDeclareKeyFlags( + # As we associate flags with _GetCallingModule(), the special + # flags defined in this module are incorrectly registered with + # a different module. So, we can't use _GetKeyFlagsForModule. + # Instead, we take all flags from _SPECIAL_FLAGS (a private + # FlagValues, where no other module should register flags). + [f.name for f in _SPECIAL_FLAGS.FlagDict().values()], + flag_values=_SPECIAL_FLAGS, + key_flag_values=flag_values) + + +# +# STRING FLAGS +# + + +def DEFINE_string(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value can be any string.""" + parser = ArgumentParser() + serializer = ArgumentSerializer() + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +# +# BOOLEAN FLAGS +# +# and the special HELP flags. + +class BooleanParser(ArgumentParser): + """Parser of boolean values.""" + + def Convert(self, argument): + """Converts the argument to a boolean; raise ValueError on errors.""" + if type(argument) == str: + if argument.lower() in ['true', 't', '1']: + return True + elif argument.lower() in ['false', 'f', '0']: + return False + + bool_argument = bool(argument) + if argument == bool_argument: + # The argument is a valid boolean (True, False, 0, or 1), and not just + # something that always converts to bool (list, string, int, etc.). + return bool_argument + + raise ValueError('Non-boolean argument to boolean flag', argument) + + def Parse(self, argument): + val = self.Convert(argument) + return val + + def Type(self): + return 'bool' + + +class BooleanFlag(Flag): + """Basic boolean flag. + + Boolean flags do not take any arguments, and their value is either + True (1) or False (0). The false value is specified on the command + line by prepending the word 'no' to either the long or the short flag + name. + + For example, if a Boolean flag was created whose long name was + 'update' and whose short name was 'x', then this flag could be + explicitly unset through either --noupdate or --nox. + """ + + def __init__(self, name, default, help, short_name=None, **args): + p = BooleanParser() + Flag.__init__(self, p, None, name, default, help, short_name, 1, **args) + if not self.help: self.help = "a boolean value" + + +def DEFINE_boolean(name, default, help, flag_values=FLAGS, **args): + """Registers a boolean flag. + + Such a boolean flag does not take an argument. If a user wants to + specify a false value explicitly, the long option beginning with 'no' + must be used: i.e. --noflag + + This flag will have a value of None, True or False. None is possible + if default=None and the user does not specify the flag on the command + line. + """ + DEFINE_flag(BooleanFlag(name, default, help, **args), flag_values) + +# Match C++ API to unconfuse C++ people. +DEFINE_bool = DEFINE_boolean + +class HelpFlag(BooleanFlag): + """ + HelpFlag is a special boolean flag that prints usage information and + raises a SystemExit exception if it is ever found in the command + line arguments. Note this is called with allow_override=1, so other + apps can define their own --help flag, replacing this one, if they want. + """ + def __init__(self): + BooleanFlag.__init__(self, "help", 0, "show this help", + short_name="?", allow_override=1) + def Parse(self, arg): + if arg: + doc = sys.modules["__main__"].__doc__ + flags = str(FLAGS) + print doc or ("\nUSAGE: %s [flags]\n" % sys.argv[0]) + if flags: + print "flags:" + print flags + sys.exit(1) + + +class HelpXMLFlag(BooleanFlag): + """Similar to HelpFlag, but generates output in XML format.""" + + def __init__(self): + BooleanFlag.__init__(self, 'helpxml', False, + 'like --help, but generates XML output', + allow_override=1) + + def Parse(self, arg): + if arg: + FLAGS.WriteHelpInXMLFormat(sys.stdout) + sys.exit(1) + + +class HelpshortFlag(BooleanFlag): + """ + HelpshortFlag is a special boolean flag that prints usage + information for the "main" module, and rasies a SystemExit exception + if it is ever found in the command line arguments. Note this is + called with allow_override=1, so other apps can define their own + --helpshort flag, replacing this one, if they want. + """ + def __init__(self): + BooleanFlag.__init__(self, "helpshort", 0, + "show usage only for this module", allow_override=1) + def Parse(self, arg): + if arg: + doc = sys.modules["__main__"].__doc__ + flags = FLAGS.MainModuleHelp() + print doc or ("\nUSAGE: %s [flags]\n" % sys.argv[0]) + if flags: + print "flags:" + print flags + sys.exit(1) + +# +# Numeric parser - base class for Integer and Float parsers +# + + +class NumericParser(ArgumentParser): + """Parser of numeric values. + + Parsed value may be bounded to a given upper and lower bound. + """ + + def Parse(self, argument): + val = self.Convert(argument) + if ((self.lower_bound is not None and val < self.lower_bound) or + (self.upper_bound is not None and val > self.upper_bound)): + raise ValueError("%s is not %s" % (val, self.syntactic_help)) + return val + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + if self.lower_bound is not None: + _WriteSimpleXMLElement(outfile, 'lower_bound', self.lower_bound, indent) + if self.upper_bound is not None: + _WriteSimpleXMLElement(outfile, 'upper_bound', self.upper_bound, indent) + + def Convert(self, argument): + """Default implementation: always returns its argument unmodified.""" + return argument + +# End of Numeric Parser + +# +# FLOAT FLAGS +# + +class FloatParser(NumericParser): + """Parser of floating point values. + + Parsed value may be bounded to a given upper and lower bound. + """ + number_article = "a" + number_name = "number" + syntactic_help = " ".join((number_article, number_name)) + + def __init__(self, lower_bound=None, upper_bound=None): + self.lower_bound = lower_bound + self.upper_bound = upper_bound + sh = self.syntactic_help + if lower_bound is not None and upper_bound is not None: + sh = ("%s in the range [%s, %s]" % (sh, lower_bound, upper_bound)) + elif lower_bound == 0: + sh = "a non-negative %s" % self.number_name + elif upper_bound == 0: + sh = "a non-positive %s" % self.number_name + elif upper_bound is not None: + sh = "%s <= %s" % (self.number_name, upper_bound) + elif lower_bound is not None: + sh = "%s >= %s" % (self.number_name, lower_bound) + self.syntactic_help = sh + + def Convert(self, argument): + """Converts argument to a float; raises ValueError on errors.""" + return float(argument) + + def Type(self): + return 'float' +# End of FloatParser + + +def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None, + flag_values=FLAGS, **args): + """Registers a flag whose value must be a float. + + If lower_bound or upper_bound are set, then this flag must be + within the given range. + """ + parser = FloatParser(lower_bound, upper_bound) + serializer = ArgumentSerializer() + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +# +# INTEGER FLAGS +# + + +class IntegerParser(NumericParser): + """Parser of an integer value. + + Parsed value may be bounded to a given upper and lower bound. + """ + number_article = "an" + number_name = "integer" + syntactic_help = " ".join((number_article, number_name)) + + def __init__(self, lower_bound=None, upper_bound=None): + self.lower_bound = lower_bound + self.upper_bound = upper_bound + sh = self.syntactic_help + if lower_bound is not None and upper_bound is not None: + sh = ("%s in the range [%s, %s]" % (sh, lower_bound, upper_bound)) + elif lower_bound == 1: + sh = "a positive %s" % self.number_name + elif upper_bound == -1: + sh = "a negative %s" % self.number_name + elif lower_bound == 0: + sh = "a non-negative %s" % self.number_name + elif upper_bound == 0: + sh = "a non-positive %s" % self.number_name + elif upper_bound is not None: + sh = "%s <= %s" % (self.number_name, upper_bound) + elif lower_bound is not None: + sh = "%s >= %s" % (self.number_name, lower_bound) + self.syntactic_help = sh + + def Convert(self, argument): + __pychecker__ = 'no-returnvalues' + if type(argument) == str: + base = 10 + if len(argument) > 2 and argument[0] == "0" and argument[1] == "x": + base = 16 + try: + return int(argument, base) + # ValueError is thrown when argument is a string, and overflows an int. + except ValueError: + return long(argument, base) + else: + try: + return int(argument) + # OverflowError is thrown when argument is numeric, and overflows an int. + except OverflowError: + return long(argument) + + def Type(self): + return 'int' + + +def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None, + flag_values=FLAGS, **args): + """Registers a flag whose value must be an integer. + + If lower_bound, or upper_bound are set, then this flag must be + within the given range. + """ + parser = IntegerParser(lower_bound, upper_bound) + serializer = ArgumentSerializer() + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +# +# ENUM FLAGS +# + + +class EnumParser(ArgumentParser): + """Parser of a string enum value (a string value from a given set). + + If enum_values (see below) is not specified, any string is allowed. + """ + + def __init__(self, enum_values=None): + self.enum_values = enum_values + + def Parse(self, argument): + if self.enum_values and argument not in self.enum_values: + raise ValueError("value should be one of <%s>" % + "|".join(self.enum_values)) + return argument + + def Type(self): + return 'string enum' + + +class EnumFlag(Flag): + """Basic enum flag; its value can be any string from list of enum_values.""" + + def __init__(self, name, default, help, enum_values=None, + short_name=None, **args): + enum_values = enum_values or [] + p = EnumParser(enum_values) + g = ArgumentSerializer() + Flag.__init__(self, p, g, name, default, help, short_name, **args) + if not self.help: self.help = "an enum string" + self.help = "<%s>: %s" % ("|".join(enum_values), self.help) + + def _WriteCustomInfoInXMLFormat(self, outfile, indent): + for enum_value in self.parser.enum_values: + _WriteSimpleXMLElement(outfile, 'enum_value', enum_value, indent) + + +def DEFINE_enum(name, default, enum_values, help, flag_values=FLAGS, + **args): + """Registers a flag whose value can be any string from enum_values.""" + DEFINE_flag(EnumFlag(name, default, help, enum_values, ** args), + flag_values) + + +# +# LIST FLAGS +# + + +class BaseListParser(ArgumentParser): + """Base class for a parser of lists of strings. + + To extend, inherit from this class; from the subclass __init__, call + + BaseListParser.__init__(self, token, name) + + where token is a character used to tokenize, and name is a description + of the separator. + """ + + def __init__(self, token=None, name=None): + assert name + self._token = token + self._name = name + self.syntactic_help = "a %s separated list" % self._name + + def Parse(self, argument): + if isinstance(argument, list): + return argument + elif argument == '': + return [] + else: + return [s.strip() for s in argument.split(self._token)] + + def Type(self): + return '%s separated list of strings' % self._name + + +class ListParser(BaseListParser): + """Parser for a comma-separated list of strings.""" + + def __init__(self): + BaseListParser.__init__(self, ',', 'comma') + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + BaseListParser.WriteCustomInfoInXMLFormat(self, outfile, indent) + _WriteSimpleXMLElement(outfile, 'list_separator', repr(','), indent) + + +class WhitespaceSeparatedListParser(BaseListParser): + """Parser for a whitespace-separated list of strings.""" + + def __init__(self): + BaseListParser.__init__(self, None, 'whitespace') + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + BaseListParser.WriteCustomInfoInXMLFormat(self, outfile, indent) + separators = list(string.whitespace) + separators.sort() + for ws_char in string.whitespace: + _WriteSimpleXMLElement(outfile, 'list_separator', repr(ws_char), indent) + + +def DEFINE_list(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value is a comma-separated list of strings.""" + parser = ListParser() + serializer = ListSerializer(',') + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +def DEFINE_spaceseplist(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value is a whitespace-separated list of strings. + + Any whitespace can be used as a separator. + """ + parser = WhitespaceSeparatedListParser() + serializer = ListSerializer(' ') + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +# +# MULTI FLAGS +# + + +class MultiFlag(Flag): + """A flag that can appear multiple time on the command-line. + + The value of such a flag is a list that contains the individual values + from all the appearances of that flag on the command-line. + + See the __doc__ for Flag for most behavior of this class. Only + differences in behavior are described here: + + * The default value may be either a single value or a list of values. + A single value is interpreted as the [value] singleton list. + + * The value of the flag is always a list, even if the option was + only supplied once, and even if the default value is a single + value + """ + + def __init__(self, *args, **kwargs): + Flag.__init__(self, *args, **kwargs) + self.help += ';\n repeat this option to specify a list of values' + + def Parse(self, arguments): + """Parses one or more arguments with the installed parser. + + Args: + arguments: a single argument or a list of arguments (typically a + list of default values); a single argument is converted + internally into a list containing one item. + """ + if not isinstance(arguments, list): + # Default value may be a list of values. Most other arguments + # will not be, so convert them into a single-item list to make + # processing simpler below. + arguments = [arguments] + + if self.present: + # keep a backup reference to list of previously supplied option values + values = self.value + else: + # "erase" the defaults with an empty list + values = [] + + for item in arguments: + # have Flag superclass parse argument, overwriting self.value reference + Flag.Parse(self, item) # also increments self.present + values.append(self.value) + + # put list of option values back in the 'value' attribute + self.value = values + + def Serialize(self): + if not self.serializer: + raise FlagsError("Serializer not present for flag %s" % self.name) + if self.value is None: + return '' + + s = '' + + multi_value = self.value + + for self.value in multi_value: + if s: s += ' ' + s += Flag.Serialize(self) + + self.value = multi_value + + return s + + def Type(self): + return 'multi ' + self.parser.Type() + + +def DEFINE_multi(parser, serializer, name, default, help, flag_values=FLAGS, + **args): + """Registers a generic MultiFlag that parses its args with a given parser. + + Auxiliary function. Normal users should NOT use it directly. + + Developers who need to create their own 'Parser' classes for options + which can appear multiple times can call this module function to + register their flags. + """ + DEFINE_flag(MultiFlag(parser, serializer, name, default, help, **args), + flag_values) + + +def DEFINE_multistring(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value can be a list of any strings. + + Use the flag on the command line multiple times to place multiple + string values into the list. The 'default' may be a single string + (which will be converted into a single-element list) or a list of + strings. + """ + parser = ArgumentParser() + serializer = ArgumentSerializer() + DEFINE_multi(parser, serializer, name, default, help, flag_values, **args) + + +def DEFINE_multi_int(name, default, help, lower_bound=None, upper_bound=None, + flag_values=FLAGS, **args): + """Registers a flag whose value can be a list of arbitrary integers. + + Use the flag on the command line multiple times to place multiple + integer values into the list. The 'default' may be a single integer + (which will be converted into a single-element list) or a list of + integers. + """ + parser = IntegerParser(lower_bound, upper_bound) + serializer = ArgumentSerializer() + DEFINE_multi(parser, serializer, name, default, help, flag_values, **args) + + +# Now register the flags that we want to exist in all applications. +# These are all defined with allow_override=1, so user-apps can use +# these flagnames for their own purposes, if they want. +DEFINE_flag(HelpFlag()) +DEFINE_flag(HelpshortFlag()) +DEFINE_flag(HelpXMLFlag()) + +# Define special flags here so that help may be generated for them. +# NOTE: Please do NOT use _SPECIAL_FLAGS from outside this module. +_SPECIAL_FLAGS = FlagValues() + + +DEFINE_string( + 'flagfile', "", + "Insert flag definitions from the given file into the command line.", + _SPECIAL_FLAGS) + +DEFINE_string( + 'undefok', "", + "comma-separated list of flag names that it is okay to specify " + "on the command line even if the program does not define a flag " + "with that name. IMPORTANT: flags in this list that have " + "arguments MUST use the --flag=value format.", _SPECIAL_FLAGS) diff --git a/tools/closure_linter/setup.cfg b/tools/closure_linter/setup.cfg new file mode 100644 index 00000000000..861a9f55426 --- /dev/null +++ b/tools/closure_linter/setup.cfg @@ -0,0 +1,5 @@ +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/tools/closure_linter/setup.py b/tools/closure_linter/setup.py new file mode 100755 index 00000000000..1d1764f2c9d --- /dev/null +++ b/tools/closure_linter/setup.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +setup(name='closure_linter', + version='2.2.6', + description='Closure Linter', + license='Apache', + author='The Closure Linter Authors', + author_email='opensource@google.com', + url='http://code.google.com/p/closure-linter', + install_requires=['python-gflags'], + package_dir={'closure_linter': 'closure_linter'}, + packages=['closure_linter', 'closure_linter.common'], + entry_points = { + 'console_scripts': [ + 'gjslint = closure_linter.gjslint:main', + 'fixjsstyle = closure_linter.fixjsstyle:main' + ] + } +)