From 38d4a750d30906a817e207bf5ed181cf8005c4f3 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Fri, 29 Nov 2024 17:06:49 +0800 Subject: [PATCH 01/87] =?UTF-8?q?=E9=85=8D=E7=BD=AE=E4=BE=9D=E8=B5=96?= =?UTF-8?q?=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/LocalCheckSE.py | 611 +++++ script/local/parser/__init__.py | 0 script/local/parser/functions.py | 34 + script/local/parser/lex.py | 866 ++++++ script/local/parser/myLexer.py | 129 + script/local/parser/myYACC.py | 162 ++ script/local/parser/parser.out | 1355 ++++++++++ script/local/parser/parsetab.py | 61 + script/local/parser/utils.py | 254 ++ script/local/parser/variables.py | 34 + script/local/parser/yacc.py | 2403 +++++++++++++++++ .../local/parser/\346\226\207\346\263\225.md" | 31 + script/local/rules/rules_multi_node.csv | 59 + script/local/rules/rules_single_node.csv | 108 + 14 files changed, 6107 insertions(+) create mode 100644 script/local/parser/__init__.py create mode 100644 script/local/parser/functions.py create mode 100644 script/local/parser/lex.py create mode 100644 script/local/parser/myLexer.py create mode 100644 script/local/parser/myYACC.py create mode 100644 script/local/parser/parser.out create mode 100644 script/local/parser/parsetab.py create mode 100644 script/local/parser/utils.py create mode 100644 script/local/parser/variables.py create mode 100644 script/local/parser/yacc.py create mode 100644 "script/local/parser/\346\226\207\346\263\225.md" create mode 100644 script/local/rules/rules_multi_node.csv create mode 100644 script/local/rules/rules_single_node.csv diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index b2aeea22..89108034 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -23,6 +23,8 @@ import sys import getopt import subprocess import re +import csv +import json localDirPath = os.path.dirname(os.path.realpath(__file__)) @@ -36,6 +38,11 @@ from domain_utils.cluster_file.version_info import VersionInfo from base_utils.os.net_util import NetUtil from domain_utils.domain_common.cluster_constants import ClusterConstants from datetime import datetime, timedelta +from local.parser.myYACC import MyYACC +from local.parser.myLexer import MyLexer +from local.parser.utils import set_settings +from local.parser.utils import set_dependency_settings +from local.parser.utils import show_dependency_info sys.path.insert(0, localDirPath + "/../../lib") import pwd @@ -2519,7 +2526,155 @@ def collectAllowSystemTableMods(): return data +def execute_query(sql_query): + """ + function : Collector AllowSystemTableMods + input : NA + output : Instantion + """ + data = AllowSystemTableMods() + data.db = [] + getDatabaseInfo(data, sql_query) + return data.db[0] + + ############################################################################# + + +def checkConnectionStatus(isSetting): + """ + function : checkConnectionStatus + input : Bool + output : NA + """ + max_connections = (int)(execute_query("""show max_connections;""")) + super_connections = (int)( + execute_query("""select setting from pg_settings where name = 'sysadmin_reserved_connections'; """)) + current_connections = (int)(execute_query("""select count(1) from pg_stat_activity;""")) + current_connections_percent = current_connections * 100 / max_connections + superuser_reserved_connections_ratio = super_connections * 100 / max_connections + track_activities_status = execute_query("""show track_activities;""") + connection_age_average = (float)( + execute_query("""select extract(epoch from avg(now()-backend_start)) as age from pg_stat_activity;""")) + pre_auth_delay = (int)(execute_query("""show pre_auth_delay;""")) + post_auth_delay = (int)(execute_query("""show post_auth_delay;""")) + if super_connections == 0: + g_logger.log( + " Warning reason: No connection slot is reserved for the superuser. In case of connection saturation you will not be able to connect to investigate or kill connections") + if superuser_reserved_connections_ratio > 20: + percentage_formatted = format_percent(superuser_reserved_connections_ratio) + message = f" Warning reason: {percentage_formatted} of connections are reserved for super user. This is excessive and may limit connections for other users." + g_logger.log(message) + if current_connections_percent > 70: + g_logger.log( + " Warning reason: You are using more than 70% of the connections slots. Increase max_connections to avoid saturation of connection slots") + elif current_connections_percent > 90: + g_logger.log( + " Warning reason:You are using more than 90% of the connection slots. Increase max_connections to avoid saturation of connection slots") + if track_activities_status == 'off': + g_logger.log(" Warning reason: average connection age not supported when track_activities is off") + else: + if connection_age_average < 1 * 60: + g_logger.log( + " Warning reason: The average connection age is less than 1 minute. Use a connection pooler to limit new connections/second") + if pre_auth_delay > 0: + message = f" Warning reason: pre_auth_delay={pre_auth_delay}: This is a developer feature for debugging and decreases connection delay of {pre_auth_delay} seconds." + g_logger.log(message) + if post_auth_delay > 0: + message = f" Warning reason: post_auth_delay={post_auth_delay}: This is a developer feature for debugging and decreases connection delay by {post_auth_delay} seconds." + g_logger.log(message) + + +def format_percent(value): + return "%.2f%%" % value + + +def checkMemoryUsageSituation(): + """ + function : checkMemoryUsageSituation + input : Bool + output : NA + """ + maintenance_work_mem = convertMemoryStrToNum(execute_query("""show maintenance_work_mem;""")) + all_databases_size = int(execute_query("""select sum(pg_database_size(datname)) from pg_database;""")) + shared_buffers = convertMemoryStrToNum(execute_query("""show shared_buffers""")) + effective_cache_size = convertMemoryStrToNum(execute_query("""show effective_cache_size""")) + if maintenance_work_mem <= 64 * 1024: + message = " Warning reason:maintenance_work_mem is less or equal to its default value. Increase it to reduce maintenance tasks duration" + g_logger.log(message) + shared_buffers_usage = all_databases_size / shared_buffers + if shared_buffers_usage < 0.7: + message = " Warning reason:shared_buffer is too big for the total databases size, uselessly using memory" + g_logger.log(message) + if effective_cache_size < shared_buffers: + message = " Warning reason:effective_cache_size < shared_buffer. This is inadequate, as effective_cache_size value must be (shared buffers) + (size in bytes of the kernel's storage buffercache that will be used for openGauss data files)" + g_logger.log(message) + buffercache_declared_size = effective_cache_size - shared_buffers + if buffercache_declared_size < 4000000000: + message = " Warning reason:The declared buffercache size ( effective_cache_size - shared_buffers ) is less than 4GB. effective_cache_size value is probably inadequate. It must be (shared buffers) + (size in bytes of the kernel's storage buffercache that will be used for openGauss data files)" + g_logger.log(message) + + +def convertMemoryStrToNum(mem_str): + units = {'KB': 1, 'MB': 1024, 'GB': 1024, 'TB': 1024 ** 3} + num, unit = mem_str[:-2], mem_str[-2:] + return int(float(num) * units[unit]) + + +def checkSharedBuffersHitRate(): + """ + function : checkSharedBuffersHitRate + input : Bool + output : NA + """ + Shared_buffers_hit_rate = float(execute_query( + """select sum(idx_blks_hit)*100/(sum(idx_blks_read)+sum(idx_blks_hit)+1) from pg_statio_all_tables;""")) + if Shared_buffers_hit_rate > 99.99: + message = " Warning reason:This is too high. If this openGauss instance was recently used as it usually is and was not stopped since, then you may reduce shared_buffer" + g_logger.log(message) + if Shared_buffers_hit_rate < 90: + message = " Warning reason:This is too low. Increase shared_buffer memory to increase hit rate" + g_logger.log(message) + + +def checkLogSituation(): + """ + function : checkLogSituation + input : Bool + output : NA + """ + log_statement = execute_query("""show log_statement""") + log_hostname = execute_query("""show log_hostname""") + log_min_duration_statement = execute_query("""show log_min_duration_statement""") + if 'min' in log_min_duration_statement: + log_min_duration_statement = int(log_min_duration_statement.replace('min', '')) * 60 * 60 + elif 'ms' in log_min_duration_statement: + log_min_duration_statement = int(log_min_duration_statement.replace('ms', '')) + if log_min_duration_statement == -1: + message = " Warning reason:Log of long queries deactivated. It will be more difficult to optimize query performance)" + g_logger.log(message) + elif log_min_duration_statement < 1000: + message = " Warning reason:any request during less than 1 sec will be written in log. It may be storage-intensive (I/O and space)" + g_logger.log(message) + if log_hostname == 'on': + message = " Warning reason:log_hostname is on: this will decrease connection performance (because openGauss has to do DNS lookups)" + g_logger.log(message) + if log_statement == 'all' or log_statement == 'mod': + message = " Warning reason:log_statement=all is very storage-intensive and only useful for debugging" + g_logger.log(message) + + +def checkUsers(): + expiring_soon_users = execute_query("""select usename from pg_user where valuntil < now() + interval '7 days'""") + i_am_super = execute_query("""select usename from pg_shadow where passwd='md5'||md5(usename||usename)""") + if len(expiring_soon_users) > 0: + message = " Warning reason:Some user account will expire in less than 7 days" + g_logger.log(message) + if len(i_am_super) > 0: + message = " Warning reason:If there is a user with the same password and username, an error message will be reported stating 'There is an insecure user password'" + g_logger.log(message) + + def checkConnection(isSetting=False): """ function : Check Connection @@ -2537,6 +2692,7 @@ def checkConnection(isSetting=False): checkHostnossl() checkHostAddressno0() checkSSLConnection(isSetting) + checkConnectionStatus(isSetting) def checkMonitorIP(isSetting): @@ -4575,6 +4731,8 @@ def checkRuntimeEnvironmentConfiguration(isSetting=False): checkUmask(isSetting) checkHidepid() checkNtpd() + checkMemoryUsageSituation() + checkSharedBuffersHitRate() def checkUmask(isSetting): @@ -4665,6 +4823,22 @@ def checkOtherConfigurations(isSetting=False): """ checkBackslashQuote(isSetting) checkAllowSystemTableMods(isSetting) + checkRunningTime(isSetting) + checkUsers() + checkPhaseCommit() + checkAutovacuum() + checkPoint() + checkStorage() + checkWal() + checkPlanner() + checkIndexes() + checkProcedures() + checkOvercommit() + checkArchive() + checkBgwriter() + checkHugepages() + checkIoSchedule(ssd=0) + checkDependencies() def checkBackslashQuote(isSetting): @@ -4696,6 +4870,443 @@ def checkAllowSystemTableMods(isSetting): setAllowSystemTableMods(data) +def checkRunningTime(isSetting): + """ + function : checkRunningTime + input : Bool + output : NA + """ + res = execute_query("""select extract(epoch from now()-pg_postmaster_start_time());""") + day_s = 60 * 60 * 24 + uptime = float(res) + if uptime < day_s: + g_logger.log( + " Warning reason:Uptime less than 1 day. This report may be inaccurate") + + +def checkPhaseCommit(): + """ + function : checkPhaseCommit + input : Bool + output : NA + """ + cur_version = execute_query("""SELECT opengauss_version();""") + if isLaterVersion(min_ver='1.0', cur_ver=cur_version): + prepared_xact_count = int(execute_query("""select count(1) from pg_prepared_xacts;""")) + if prepared_xact_count != 0: + message = " Warning reason:two-phase commit prepared transactions exist. If they stay for too long they may lock objects for too long" + g_logger.log(message) + prepared_xact_lock_count = int(execute_query( + """select count(1) from pg_locks where transactionid in (select transaction from pg_prepared_xacts);""")) + if prepared_xact_lock_count > 0: + message = " Warning reason:Two-phase commit transactions have " + str( + prepared_xact_lock_count) + "locks!" + g_logger.log(message) + + +def isLaterVersion(min_ver, cur_ver): + min_major, min_minor = min_ver.split('.')[0], min_ver.split('.')[1] + cur_major, cur_minor = cur_ver.split('.')[0], cur_ver.split('.')[1] + min_major, min_minor, cur_major, cur_minor = int(min_major), int(min_minor), int(cur_major), int(cur_minor) + if cur_major > min_major: + return True + if cur_major == min_major: + return cur_minor >= min_minor + return False + + +def checkAutovacuum(): + """ + function : checkAutovacuum + input : Bool + output : NA + """ + autovacuum = execute_query("""show autovacuum;""") + if autovacuum != 'on': + message = " Warning reason:autovacuum is not activated. This is bad except if you know what you are doing" + g_logger.log(message) + vacuum_cost_delay = execute_query("""show vacuum_cost_delay;""") + vacuum_cost_delay = int(vacuum_cost_delay.rstrip('ms')) + if vacuum_cost_delay >= 20: + message = " Warning reason: acuum_cost_delay is generally set relatively small, with a common setting of 10 or 20 milliseconds, reduce vacuum_cost_delay" + g_logger.log(message) + + +def checkPoint(): + """ + function : checkPoint + input : Bool + output : NA + """ + checkpoint_completion_target = float(execute_query("""show checkpoint_completion_target;""")) + checkpoint_timeout = trans(execute_query("""show checkpoint_timeout;""")) + checkpoint_warning = trans(execute_query("""show checkpoint_warning;""")) + checkpoint_dirty_writing_time_window = checkpoint_timeout * checkpoint_completion_target + if checkpoint_warning == 0: + message = " Warning reason:checkpoint_warning value is 0. This is rarely adequate" + g_logger.log(message) + if checkpoint_completion_target == 0: + message = " Warning reason:checkpoint_completion_target value is 0. This is absurd" + g_logger.log(message) + else: + if checkpoint_completion_target < 0.5: + message = " Warning reason:Checkpoint_completion_target is lower than its default value (0.5)" + g_logger.log(message) + elif 0.5 <= checkpoint_completion_target <= 0.7: + message = " Warning reason:checkpoint_completion_target is low" + g_logger.log(message) + elif 0.9 < checkpoint_completion_target < 1: + message = " Warning reason:checkpoint_completion_target is too near to 1" + g_logger.log(message) + else: + message = " Warning reason:checkpoint_completion_target too high" + g_logger.log(message) + if checkpoint_dirty_writing_time_window < 10: + message = " Warning reason:(checkpoint_timeout / checkpoint_completion_target) is probably too low" + g_logger.log(message) + + +def trans(data): + return int(re.sub(r'\D', '', data)) + + +def checkWal(): + """ + function : checkWal + input : Bool + output : NA + """ + wal = execute_query("""show wal_level;""") + if wal == 'minimal': + message = " Warning reason:The 'minimal' wal_level does not allow PITR backup and recovery" + g_logger.log(message) + + +def checkPlanner(): + """ + function : checkPlanner + input : Bool + output : NA + """ + ModifiedCosts = execute_query("""select name from pg_settings where name like '%cost%' and setting<>boot_val;""") + DisabledPlanFunctions = execute_query( + """select name, setting from pg_settings where name like 'enable_%' and setting='off' ;""") + if len(ModifiedCosts) != 0: + message = " Warning reason:Some I/O cost settings are not set to their default value,This may lead the planner to create suboptimal plans" + g_logger.log(message) + if len(DisabledPlanFunctions) != 0: + message = " Warning reason:Some plan features are disabled: " + g_logger.log(message) + + +def checkIndexes(): + """ + function : checkIndexes + input : Bool + output : NA + """ + invalid_indexes = execute_query( + """SELECT concat(n.nspname, '.', c.relname) as index FROM pg_catalog.pg_class c,pg_catalog.pg_namespace n,pg_catalog.pg_index i WHERE i.indisvalid = false AND i.indexrelid = c.oid AND c.relnamespace = n.oid;""") + if len(invalid_indexes) > 0: + message = " Warning reason:List of invalid indexes in the database Please check/reindex any invalid index" + g_logger.log(message) + + +def checkProcedures(): + """ + function : checkProcedures + input : Bool + output : NA + """ + default_cost_procs = execute_query( + """select n.nspname||'.'||p.proname from pg_catalog.pg_proc p left join pg_catalog.pg_namespace n on n.oid = p.pronamespace where pg_catalog.pg_function_is_visible(p.oid) and n.nspname not in ('pg_catalog','information_schema','sys') and p.prorows<>1000 and p.procost<>10 and p.proname not like 'uuid_%' and p.proname != 'pg_stat_statements_reset'""") + if len(default_cost_procs) > 0: + message = " Warning reason:user procedures do not have custom cost and rows settings'" + g_logger.log(message) + + +def checkOvercommit(): + """ + function : checkOvercommit + input : Bool + output : NA + """ + cmd_memory = "cat /proc/sys/vm/overcommit_memory" + cmd_ratio = "cat /proc/sys/vm/overcommit_ratio" + os_name = "uname -s" + overcommit_memory = getCmdRes(cmd_memory) + overcommit_ratio = getCmdRes(cmd_ratio) + if not (is_pure_digit(overcommit_memory) and is_pure_digit(overcommit_ratio)): + return + os_name = getCmdRes(os_name) + if os_name != 'darwin' and int(overcommit_memory) != 2: + message = " Warning reason:Memory overcommitment is allowed on the system. This may lead the OOM Killer to kill at least one openGauss process, DANGER!" + g_logger.log(message) + if int(overcommit_ratio) <= 50: + message = ( + " Warning reason: vm.overcommit_ratio is too low. You will not be able to use more than ({}/100) * RAM + SWAP for applications.".format( + overcommit_ratio)) + g_logger.log(message) + elif int(overcommit_ratio) > 90: + message = " Warning reason:vm.overcommit_ratio is too high, you need to keep free memory" + g_logger.log(message) + + +def is_pure_digit(s): + return s.isdigit() + + +def getCmdRes(command): + try: + result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + return result.stdout.replace('\n', '') + except Exception as e: + return '' + + +def checkStorage(): + fsync = execute_query("""show fsync""") + wal_sync_method = execute_query("""show wal_sync_method""") + synchronize_seqscans = execute_query("""show synchronize_seqscans""") + os_name = getCmdRes("uname -s") + if fsync != 'on': + message = " Warning reason:fsync is off. You may lose data after a crash, DANGER!" + g_logger.log(message) + if os_name == 'darwin' and wal_sync_method != 'fsync_writethrough': + message = ( + " Warning reason:wal_sync_method is {}. Settings other than fsync_writethrough may lead to loss of data after a crash, DANGER!".format( + wal_sync_method)) + g_logger.log(message) + + if synchronize_seqscans != 'on': + message = " Warning reason:synchronize_seqscans is off" + g_logger.log(message) + + +def checkArchive(): + archive_timeout = execute_query("""show archive_timeout""") + archive_timeout = int(archive_timeout.rstrip('s')) + if archive_timeout < 60: + g_logger.log( + " Warning reason:Setting archive_timeout to a very small value will result in occupying a huge amount of archive storage space. It is recommended to set archive_timeout to 60 seconds") + + +def checkBgwriter(): + bgwriter_lru_multiplier = int(execute_query("""show bgwriter_lru_multiplier""")) + if bgwriter_lru_multiplier < 1: + g_logger.log( + " Warning reason:Setting a smaller bgwriter_lru_multipler reduces the additional I/O overhead caused by backend write processes, increase bgwriter_lru_multiplier") + + +def checkHugepages(): + huge_pages = execute_query("""show enable_huge_pages""") + os_name = getCmdRes("uname -s") + if os_name != 'linux' and os_name != 'Linux' and os_name != 'freebsd': + g_logger.log(" Warning reason:No Huge Pages on this OS") + else: + nr_hugepages = getCmdRes("cat /proc/sys/vm/nr_hugepages") + if nr_hugepages == None or int(nr_hugepages) == 0: + g_logger.log(" Warning reason:No Huge Pages available on the system") + else: + if huge_pages == 'on': + g_logger.log( + " Warning reason:enable_huge_pages=on, therefore openGauss needs Huge Pages and will not start if the kernel doesn't provide the") + else: + os_huge = subprocess.run("grep ^Huge /proc/meminfo", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True).stdout + os_huge_list = os_huge.replace(' ', '').split('\n') + os_info = {} + # 使用循环遍历分割后的列表,并将键值对添加到字典中 + for item in os_huge_list: + if item != '': + key, value = item.split(':') + os_info[key.strip()] = int(re.search(r'\d+', value.strip()).group()) + pg_pid = execute_query("""SELECT pg_backend_pid();""") + peak = getCmdRes("grep ^VmPeak /proc/" + pg_pid + "/status | awk '{ print $2 }'").strip() + if peak.isdigit(): + suggesthugepages = int(peak) / int(os_info['Hugepagesize']) + if os_info['HugePages_Total'] < int(suggesthugepages + 0.5): + message = " Warning reason:set vm.nr_hugepages=%d" % int( + suggesthugepages + 0.5) + " in /etc/sysctl.conf and invoke sysctl -p /etc/sysctl.conf to reload it. This will allocate Huge Pages (it may require a system reboot)" + g_logger.log(message) + if os_info['Hugepagesize'] == 2048: + g_logger.log(" Warning reason:Change Huge Pages size from 2MB to 1GB if the machine is dedicated to openGauss") + + +def checkIoSchedule(ssd=0): + active_schedulers = {} + os = {} + os['name'] = getCmdRes("uname -s") + if os['name'] == 'darwin': + g_logger.log(" Warning reason:No I/O scheduler information on MacOS") + else: + storage_units_list = subprocess.run("ls /sys/block/", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + if storage_units_list.returncode != 0: + g_logger.log(" Warning reason:Unable to explore storage unit(s) system attributes") + else: + for unit in storage_units_list.stdout.split('\n'): + if unit == '.' or unit == '..' or unit == '': + continue + if unit.startswith('sr'): + continue + # Scheduler + unit_schedulers = subprocess.run("cat /sys/block/%s/queue/scheduler" % unit, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_schedulers.returncode != 0: + g_logger.log( + " Warning reason:Unable to identify the scheduler used for the storage unit %s" % unit) + else: + unit_schedulers = str(unit_schedulers.stdout.strip()) + if unit_schedulers == 'none': + continue + for scheduler in unit_schedulers.split(): + match = re.match(r'^\[([a-z-]+)\]$', scheduler) + if match: + active_schedulers[match.group(1)] = active_schedulers.get(match.group(1), 0) + 1 + + # Detect SSD or rotational disks + rotational_storage = 0 + unit_is_rotational = 1 # Default + if ssd: + unit_is_rotational = 0 + else: + unit_is_rotational = subprocess.run("cat /sys/block/%s/queue/rotational" % unit, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_is_rotational.returncode != 0: + g_logger.log( + " Warning reason:Unable to identify if the storage unit %s is rotational" % unit) + unit_is_rotational = 0 + else: + unit_is_rotational = unit_is_rotational.stdout.strip() + rotational_storage += int(unit_is_rotational) + + hypervisor = None + if os['name'] != 'darwin': + systemd = subprocess.run("systemd-detect-virt --vm", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + if systemd.returncode == 0: + systemd = systemd.stdout.strip() + if re.match('\S+', systemd): + hypervisor = systemd + else: + dmesg = subprocess.run("dmesg", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + for line in dmesg.stdout.split('\n'): + if re.match('vmware', line, re.IGNORECASE): + hypervisor = 'VMware' + break + elif re.match('kvm', line, re.IGNORECASE): + hypervisor = 'KVM' + break + elif re.match('xen', line, re.IGNORECASE): + hypervisor = 'XEN' + break + elif re.match('vbox', line, re.IGNORECASE): + hypervisor = 'VirtualBox' + break + elif re.match('hyper-v', line, re.IGNORECASE): + hypervisor = 'Hyper-V' + break + + if hypervisor is not None and rotational_storage is not None and rotational_storage > 0: + g_logger.log( + " Warning reason:If openGauss runs in a virtual machine, I cannot know the underlying physical storage type. Use the --ssd arg if the VM only uses SSD storage") + + if hypervisor is not None and 'cfq' in active_schedulers: + g_logger.log( + " Warning reason:The CFQ scheduler is inadequate on a virtual machine (because the hypervisor and/or underlying kernel is already in charge of the I/O scheduling)") + + +def checkDependencies(): + sql_query = """ + SELECT json_agg(json_build_object( + 'name', name, + 'setting', setting, + 'unit', unit, + 'category',category, + 'short_desc', short_desc, + 'extra_desc', extra_desc, + 'context', context, + 'vartype', vartype, + 'source', source, + 'min_val', min_val, + 'max_val', max_val, + 'enumvals', enumvals, + 'boot_val', boot_val, + 'reset_val', reset_val, + 'sourcefile', sourcefile, + 'sourceline', sourceline + )) FROM pg_settings; + """ + port = int(getValueFromFile('port')) + cmd = f"gsql -d postgres -p '{port}' -r -c \"{sql_query}\"" + result = subprocess.run( + ['gsql', '-d', 'postgres', '-p', str(port), '-c',sql_query, '-t', '-A'], + capture_output=True, + text=True + ) + if result.returncode != 0: + raise Exception((ErrorCode.GAUSS_505["GAUSS_50502"] % "ConnectionConfiguration") + + ("The cmd is : %s" % cmd)) + + ret = json.loads(result.stdout.strip()) + settings = {} + for row in ret: + settings[row['name']] = row + set_settings(settings) + set_dependency_settings(settings) + current_path = os.path.dirname(os.path.realpath(__file__)) + # Determine whether a single node or multiple nodes + local_role_value = get_local_role_value(os.environ['PGDATA']) + rule_files = determine_rule_files(local_role_value) + m = MyLexer() + m.build() + y = MyYACC() + y.build() + dependency_parser = y.yacc + for rule_file in rule_files: + with open(os.path.join(current_path, rule_file), mode='r', newline='', encoding='utf-8') as csvfile: + reader = csv.reader(csvfile) + dependencies = [] + for row in reader: + dependencies.append(row[1]) + dependencies = list(set(dependencies)) + for dependency in dependencies: + try: + dependency_parser.parse(dependency) + except Exception as e: + continue + show_dependency_info() + + +def get_local_role_value(conf_path): + cmd = "gs_ctl query -D %s" % (os.getenv('PGDATA')) + try: + # Execute commands and capture output + result =subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + # Extracting the value of local_role using regular expressions + match = re.search(r'local_role\s+:\s+(\w+)', result.stdout) + if match: + return match.group(1) + else: + raise ValueError("local_role 未找到") + except subprocess.CalledProcessError as e: + raise RuntimeError(f"命令执行失败: {e.stderr.strip()}") from e + except ValueError as e: + raise RuntimeError(str(e)) from e + + +def determine_rule_files(local_role_value): + if local_role_value == 'Normal': + return ['rules/rules_single_node.csv'] + else: + return ['rules/rules_multi_node.csv'] + ############################################################################# def setOtherConfigurations(isSetting=True): """ diff --git a/script/local/parser/__init__.py b/script/local/parser/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/script/local/parser/functions.py b/script/local/parser/functions.py new file mode 100644 index 00000000..350a877e --- /dev/null +++ b/script/local/parser/functions.py @@ -0,0 +1,34 @@ +import sys +import os +localDirPath = os.path.dirname(os.path.realpath(__file__)) + +sys.path.append(sys.path[0] + "/../") + +from local.parser.utils import add_dependency_info + + +def Alert(level, s): + add_dependency_info(level, 'Check Alert', s) + +def NotEffect(level, s): + add_dependency_info(level, 'Check NoEffect', s) + +def Overwrite(level, s): + add_dependency_info(level, 'Check Overwrite', s) + +def Function(level, s): + add_dependency_info(level, 'Check Function', s) + +def Performance(level, s): + add_dependency_info(level, 'Check Performance', s) + +function_dict = { + "Alert" : Alert, + "NotEffect" : NotEffect, + "Overwrite" : Overwrite, + "Function" : Function, + "Performance" : Performance, +} + +def get_function(name): + return function_dict[name] \ No newline at end of file diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py new file mode 100644 index 00000000..129f3237 --- /dev/null +++ b/script/local/parser/lex.py @@ -0,0 +1,866 @@ +import re +import sys +import types +import copy +import os +import inspect + +# This tuple contains acceptable string types +StringTypes = (str, bytes) + +# This regular expression is used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Exception thrown when invalid token encountered and no default error +# handler is defined. +class LexError(Exception): + def __init__(self, message, s): + self.args = (message,) + self.text = s + +# Token class. This class is used to represent the tokens produced. +class LexToken(object): + def __repr__(self): + return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})' + +# This object is a stand-in for a logging object created by the +# logging module. + +class Logger(object): + def __init__(self, f): + self.f = f + + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + info = critical + debug = critical + +# ----------------------------------------------------------------------------- +# === Lexing Engine === +# +# The following Lexer class implements the lexer runtime. There are only +# a few public methods and attributes: +# +# input() - Store a new string in the lexer +# token() - Get the next token +# clone() - Clone the lexer +# +# lineno - Current line number +# lexpos - Current position in the input string +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re, findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names + self.lexstate = 'INITIAL' # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + + def clone(self, object=None): + c = copy.copy(self) + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = {} + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = {} + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object, ef.__name__) + c.lexmodule = object + return c + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self, s): + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self, state): + if state not in self.lexstatere: + raise ValueError(f'Undefined state {state!r}') + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self, state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self, n): + self.lexpos += n + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + + i = m.lastindex + func, tok.type = lexindexfunc[i] + + if not func: + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break + + lexpos = m.end() + + # If token is processed by a function, call it + + tok.lexer = self # Set additional attributes useful in token rules + self.lexmatch = m + self.lexpos = lexpos + newtok = func(tok) + del tok.lexer + del self.lexmatch + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + lexignore = self.lexignore # This is here in case there was a state change + break + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = 'error' + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}", + lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: + continue + return newtok + + self.lexpos = lexpos + raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", + lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError('No input string given with input()') + return None + + # Iterator interface + def __iter__(self): + return self + + def __next__(self): + t = self.token() + if t is None: + raise StopIteration + return t + +# ----------------------------------------------------------------------------- +# ==== Lex Builder === +# +# The functions and classes below are used to collect lexing information +# and build a Lexer object from it. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- +def get_caller_module_dict(levels): + f = sys._getframe(levels) + return { **f.f_globals, **f.f_locals } + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [], [], [] + regex = '|'.join(relist) + try: + lexre = re.compile(regex, reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) + lexindexnames = lexindexfunc[:] + + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle, toknames[f]) + lexindexnames[i] = f + elif handle is not None: + lexindexnames[i] = f + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) + else: + lexindexfunc[i] = (None, toknames[f]) + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] + except Exception: + m = (len(relist) // 2) + 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break + + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names) + + tokenname = '_'.join(parts[i:]) + return (states, tokenname) + + +# ----------------------------------------------------------------------------- +# LexerReflect() +# +# This class represents information needed to build a lexer as extracted from a +# user's input file. +# ----------------------------------------------------------------------------- +class LexerReflect(object): + def __init__(self, ldict, log=None, reflags=0): + self.ldict = ldict + self.error_func = None + self.tokens = [] + self.reflags = reflags + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = Logger(sys.stderr) if log is None else log + + # Get all of the basic information + def get_all(self): + self.get_tokens() + self.get_literals() + self.get_states() + self.get_rules() + + # Validate all of the information + def validate_all(self): + self.validate_tokens() + self.validate_literals() + self.validate_rules() + return self.error + + # Get the tokens map + def get_tokens(self): + tokens = self.ldict.get('tokens', None) + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + terminals = {} + for n in self.tokens: + if not _is_identifier.match(n): + self.log.error(f"Bad token name {n!r}") + self.error = True + if n in terminals: + self.log.warning(f"Token {n!r} multiply defined") + terminals[n] = 1 + + # Get the literals specifier + def get_literals(self): + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' + + # Validate literals + def validate_literals(self): + try: + for c in self.literals: + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error(f'Invalid literal {c!r}. Must be a single character') + self.error = True + + except TypeError: + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True + + def get_states(self): + self.states = self.ldict.get('states', None) + # Build statemap + if self.states: + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %r must be a string', name) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State %r already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype + + # Get all of the symbols with a t_ prefix and sort them into various + # categories (functions, strings, error functions, and ignore characters) + + def get_rules(self): + tsymbols = [f for f in self.ldict if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state + + for s in self.stateinfo: + self.funcsym[s] = [] + self.strsym[s] = [] + + if len(tsymbols) == 0: + self.log.error('No rules of the form t_rulename are defined') + self.error = True + return + + for f in tsymbols: + t = self.ldict[f] + states, tokname = _statetoken(f, self.stateinfo) + self.toknames[f] = tokname + + if hasattr(t, '__call__'): + if tokname == 'error': + for s in states: + self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t + elif tokname == 'ignore': + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) + self.error = True + else: + for s in states: + self.funcsym[s].append((f, t)) + elif isinstance(t, StringTypes): + if tokname == 'ignore': + for s in states: + self.ignore[s] = t + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) + + elif tokname == 'error': + self.log.error("Rule %r must be defined as a function", f) + self.error = True + else: + for s in states: + self.strsym[s].append((f, t)) + else: + self.log.error('%s not defined as a function or string', f) + self.error = True + + # Sort the functions by line number + for f in self.funcsym.values(): + f.sort(key=lambda x: x[1].__code__.co_firstlineno) + + # Sort the strings by regular expression length + for s in self.strsym.values(): + s.sort(key=lambda x: len(x[1]), reverse=True) + + # Validate all of the t_rules collected + def validate_rules(self): + for state in self.stateinfo: + # Validate all rules defined by functions + + for fname, f in self.funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + tokname = self.toknames[fname] + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True + continue + + if nargs < reqargs: + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True + continue + + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) + self.error = True + + # Validate all rules defined by strings + for name, r in self.strsym[state]: + tokname = self.toknames[name] + if tokname == 'error': + self.log.error("Rule %r must be defined as a function", name) + self.error = True + continue + + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule %r defined for an unspecified token %s", name, tokname) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule %r matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule %r. %s", name, e) + if '#' in r: + self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) + self.error = True + + if not self.funcsym[state] and not self.strsym[state]: + self.log.error("No rules defined for state %r", state) + self.error = True + + # Validate the error function + efunc = self.errorf.get(state, None) + if efunc: + f = efunc + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True + + if nargs < reqargs: + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True + + for module in self.modules: + self.validate_module(module) + + # ----------------------------------------------------------------------------- + # validate_module() + # + # This checks to see if there are duplicated t_rulename() functions or strings + # in the parser input file. This is done using a simple regular expression + # match on each line in the source code of the given module. + # ----------------------------------------------------------------------------- + + def validate_module(self, module): + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + return + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) + if not m: + m = sre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True + linen += 1 + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(*, module=None, object=None, debug=False, + reflags=int(re.VERBOSE), debuglog=None, errorlog=None): + + global lexer + + ldict = None + stateinfo = {'INITIAL': 'inclusive'} + lexobj = Lexer() + global token, input + + if errorlog is None: + errorlog = Logger(sys.stderr) + + if debug: + if debuglog is None: + debuglog = Logger(sys.stderr) + + # Get the module dictionary used for the lexer + if object: + module = object + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ + else: + ldict = get_caller_module_dict(2) + + # Collect parser information from the dictionary + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) + linfo.get_all() + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") + + # Dump some basic debugging information + if debug: + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) + + # Build a dictionary of valid token names + lexobj.lextokens = set() + for n in linfo.tokens: + lexobj.lextokens.add(n) + + # Get literals specification + if isinstance(linfo.literals, (list, tuple)): + lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) + else: + lexobj.lexliterals = linfo.literals + + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + + # Get the stateinfo dictionary + stateinfo = linfo.stateinfo + + regexs = {} + # Build the master regular expressions + for state in stateinfo: + regex_list = [] + + # Add rules defined by functions first + for fname, f in linfo.funcsym[state]: + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) + + # Now add all of the simple rules + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) + + regexs[state] = regex_list + + # Build the master regular expressions + + if debug: + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') + + for state in regexs: + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + lexobj.lexstaterenames[state] = re_names + if debug: + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) + + # For inclusive states, we need to add the regular expressions from the INITIAL state + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] + lexobj.lexreflags = reflags + + # Set up ignore variables + lexobj.lexstateignore = linfo.ignore + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') + + # Set up error functions + lexobj.lexstateerrorf = linfo.errorf + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) + if not lexobj.lexerrorf: + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) + + # Check state information for ignore and error rules + for s, stype in stateinfo.items(): + if stype == 'exclusive': + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state %r", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state %r", s) + elif stype == 'inclusive': + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None, data=None): + if not data: + try: + filename = sys.argv[1] + with open(filename) as f: + data = f.read() + except IndexError: + sys.stdout.write('Reading from standard input (type EOF to end):\n') + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while True: + tok = _token() + if not tok: + break + sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) + else: + f.regex = r + return f + return set_regex diff --git a/script/local/parser/myLexer.py b/script/local/parser/myLexer.py new file mode 100644 index 00000000..3208c042 --- /dev/null +++ b/script/local/parser/myLexer.py @@ -0,0 +1,129 @@ +# import ply.lex as lex +import os +import sys +localDirPath = os.path.dirname(os.path.realpath(__file__)) + +sys.path.append(sys.path[0] + "/../") +from local.parser.lex import * +from decimal import Decimal + +reserved = { + 'NULL' : 'NULL', + 'true' : 'TRUE', + 'false': 'FALSE', +} + +tokens = [ + 'NUMBER', + 'STRING', + 'ID', + 'AND', + 'OR', + 'EQUAL', + 'NEQUAL', + 'GT', + 'GE', + 'LT', + 'LE', + 'PLUS', + 'MINUS', + 'TIMES', + 'DIVIDE', + 'COMMA', + 'LPAREN', + 'RPAREN', + 'THEN', + 'NOT', + 'MOD', +] + list(reserved.values()) + +token_dict = { + 'AND' : '&&', + 'OR' : '||', + 'NEQUAL' : '!=', + 'NOT' : '!', + 'EQUAL' : '==', + 'GT' : '>', + 'GE' : '>=', + 'LT' : '<', + 'LE' : '<=', + 'PLUS' : '+', + 'MINUS' : '-', + 'TIMES' : '*', + 'DIVIDE' : '/', + 'COMMA' : ',', + 'LPAREN' : '(', + 'RPAREN' : ')', + 'THEN' : '->', + 'NULL' : 'NULL', + 'TRUE' : 'true', + 'FALSE' : 'false', + 'MOD' : '%', +} + +class MyLexer(): + + tokens = tokens + + reserved = reserved + + t_THEN = r'->' + t_AND = r'&&' + t_OR = r'\|\|' + t_EQUAL = r'==' + t_NEQUAL = r'!=' + t_NOT = r'!' + t_GE = r'>=' + t_GT = r'>' + t_LE = r'<=' + t_LT = r'<' + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_COMMA = r',' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_MOD = r'\%' + + def t_NUMBER(self, t): + r'-?[0-9]+(\.[0-9]+)?' + t.value = Decimal(t.value) + return t + + def t_STRING(self, t): + r'"[^"]*"' + t.value = t.value[1:-1] + return t + + def t_ID(self, t): + r'[a-zA-Z_][a-zA-Z_0-9]*' + t.type = self.reserved.get(t.value,'ID') + return t + + # Define a rule so we can track line numbers + def t_newline(self,t): + r'\n+' + t.lexer.lineno += len(t.value) + + # A string containing ignored characters (spaces and tabs) + t_ignore = ' \t' + + # Error handling rule + def t_error(self,t): + #print("Illegal character '%s'" % t.value[0]) + raise Exception('Illegal character "%s"' % t.value[0]) + t.lexer.skip(1) + + # Build the lexer + def build(self,**kwargs): + self.lexer = lex(module=self, **kwargs) + + # Test it output + def test(self,data): + self.lexer.input(data) + while True: + tok = self.lexer.token() + if not tok: + break + print(tok) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py new file mode 100644 index 00000000..31b3e6cd --- /dev/null +++ b/script/local/parser/myYACC.py @@ -0,0 +1,162 @@ +# import ply.yacc as yacc +import os +import sys +localDirPath = os.path.dirname(os.path.realpath(__file__)) + +sys.path.append(sys.path[0] + "/../") +from local.parser.lex import * +from local.parser.yacc import * +from local.parser.myLexer import tokens +from local.parser.variables import * +from local.parser.functions import * +from local.parser.myLexer import token_dict + +def execFn(fn): + fn[0](*fn[1]) + +class MyYACC(): + + tokens = tokens + + def p_conditions_relation_function(p): + '''sentence : conditions THEN function + ''' + if p[1]: + execFn(p[3]) + + def p_conditions_or(p): + 'conditions : conditions OR and_conditions' + p[0] = p[1] or p[3] + + def p_conditions_and_conditions(p): + 'conditions : and_conditions' + p[0] = p[1] + + def p_and_conditions_and(p): + ''' + and_conditions : and_conditions AND not_conditions + ''' + p[0] = p[1] and p[3] + + def p_and_conditions_cdt(p): + 'and_conditions : not_conditions' + p[0] = p[1] + + def p_not_cdt(p): + 'not_conditions : NOT cdt' + p[0] = not p[2] + + def p_not_conditions_cdt(p): + 'not_conditions : cdt' + p[0] = p[1] + + def p_cdt_ops(p): + ''' + cdt : expr EQUAL expr + | expr NEQUAL expr + | expr GE expr + | expr GT expr + | expr LE expr + | expr LT expr + ''' + if p[2] == token_dict['EQUAL']: + p[0] = (p[1] == p[3]) + if p[2] == token_dict['NEQUAL']: + p[0] = (p[1] != p[3]) + if p[2] == token_dict['GE']: + p[0] = (p[1] >= p[3]) + if p[2] == token_dict['GT']: + p[0] = (p[1] > p[3]) + if p[2] == token_dict['LE']: + p[0] = (p[1] <= p[3]) + if p[2] == token_dict['LT']: + p[0] = (p[1] < p[3]) + + # def p_cdt_expr(p): + # 'cdt : expr' + # p[0] = p[1] + + def p_cdt_parens(p): + 'cdt : LPAREN conditions RPAREN' + p[0] = p[2] + + def p_expr_plus_minus(p): + ''' + expr : expr PLUS term + | expr MINUS term + ''' + if p[2] == token_dict['PLUS']: + p[0] = p[1] + p[3] + if p[2] == token_dict['MINUS']: + p[0] = p[1] - p[3] + + def p_expr_term(p): + 'expr : term' + p[0] = p[1] + + def p_term_times_divide_mod(p): + ''' + term : term TIMES factor + | term DIVIDE factor + | term MOD factor + ''' + if p[2] == token_dict['TIMES']: + p[0] = p[1] * p[3] + if p[2] == token_dict['DIVIDE']: + p[0] = p[1] / p[3] + if p[2] == token_dict['MOD']: + p[0] = p[1] % p[3] + + def p_term_factor(p): + 'term : factor' + p[0] = p[1] + + def p_factor_assign_simple(p): + ''' + factor : NUMBER + | STRING + ''' + p[0] = p[1] + + def p_factor_id(p): + 'factor : ID' + p[0] = get_variable(p[1]) + + def p_factor_null(p): + 'factor : NULL' + p[0] = None + + def p_factor_bool(p): + ''' + factor : TRUE + | FALSE + ''' + if p[1] == token_dict['TRUE']: + p[0] = True + elif p[1] == token_dict['FALSE']: + p[0] = False + + def p_factor_paren(p): + 'factor : LPAREN expr RPAREN' + p[0] = p[2] + + def p_function(p): + 'function : ID LPAREN variables RPAREN' + p[0] = (get_function(p[1]), p[3]) + def p_variables_comma(p): + ''' + variables : variables COMMA expr + ''' + p[1].append(p[3]) + p[0] = p[1] + def p_variables_factor(p): + 'variables : expr' + p[0] = [p[1]] + + #Error rule for syntax errors + def p_error(p): + #print("Syntax error in input!") + raise Exception('Syntax error in input!') + + def build(self): + self.yacc = yacc(module=MyYACC) diff --git a/script/local/parser/parser.out b/script/local/parser/parser.out new file mode 100644 index 00000000..e470295d --- /dev/null +++ b/script/local/parser/parser.out @@ -0,0 +1,1355 @@ +Created by PLY version 3.11 (http://www.dabeaz.com/ply) + +Grammar + +Rule 0 S' -> sentence +Rule 1 sentence -> conditions THEN function +Rule 2 conditions -> conditions OR and_conditions +Rule 3 conditions -> and_conditions +Rule 4 and_conditions -> and_conditions AND not_conditions +Rule 5 and_conditions -> not_conditions +Rule 6 not_conditions -> NOT cdt +Rule 7 not_conditions -> cdt +Rule 8 cdt -> expr EQUAL expr +Rule 9 cdt -> expr NEQUAL expr +Rule 10 cdt -> expr GE expr +Rule 11 cdt -> expr GT expr +Rule 12 cdt -> expr LE expr +Rule 13 cdt -> expr LT expr +Rule 14 cdt -> LPAREN conditions RPAREN +Rule 15 expr -> expr PLUS term +Rule 16 expr -> expr MINUS term +Rule 17 expr -> term +Rule 18 term -> term TIMES factor +Rule 19 term -> term DIVIDE factor +Rule 20 term -> term MOD factor +Rule 21 term -> factor +Rule 22 factor -> NUMBER +Rule 23 factor -> STRING +Rule 24 factor -> ID +Rule 25 factor -> NULL +Rule 26 factor -> TRUE +Rule 27 factor -> FALSE +Rule 28 factor -> LPAREN expr RPAREN +Rule 29 function -> ID LPAREN variables RPAREN +Rule 30 variables -> variables COMMA expr +Rule 31 variables -> expr + +Terminals, with rules where they appear + +AND : 4 +COMMA : 30 +DIVIDE : 19 +EQUAL : 8 +FALSE : 27 +GE : 10 +GT : 11 +ID : 24 29 +LE : 12 +LPAREN : 14 28 29 +LT : 13 +MINUS : 16 +MOD : 20 +NEQUAL : 9 +NOT : 6 +NULL : 25 +NUMBER : 22 +OR : 2 +PLUS : 15 +RPAREN : 14 28 29 +STRING : 23 +THEN : 1 +TIMES : 18 +TRUE : 26 +error : + +Nonterminals, with rules where they appear + +and_conditions : 2 3 4 +cdt : 6 7 +conditions : 1 2 14 +expr : 8 8 9 9 10 10 11 11 12 12 13 13 15 16 28 30 31 +factor : 18 19 20 21 +function : 1 +not_conditions : 4 5 +sentence : 0 +term : 15 16 17 18 19 20 +variables : 29 30 + +Parsing method: LALR + +state 0 + + (0) S' -> . sentence + (1) sentence -> . conditions THEN function + (2) conditions -> . conditions OR and_conditions + (3) conditions -> . and_conditions + (4) and_conditions -> . and_conditions AND not_conditions + (5) and_conditions -> . not_conditions + (6) not_conditions -> . NOT cdt + (7) not_conditions -> . cdt + (8) cdt -> . expr EQUAL expr + (9) cdt -> . expr NEQUAL expr + (10) cdt -> . expr GE expr + (11) cdt -> . expr GT expr + (12) cdt -> . expr LE expr + (13) cdt -> . expr LT expr + (14) cdt -> . LPAREN conditions RPAREN + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NOT shift and go to state 5 + LPAREN shift and go to state 8 + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + + sentence shift and go to state 1 + conditions shift and go to state 2 + and_conditions shift and go to state 3 + not_conditions shift and go to state 4 + cdt shift and go to state 6 + expr shift and go to state 7 + term shift and go to state 9 + factor shift and go to state 10 + +state 1 + + (0) S' -> sentence . + + + +state 2 + + (1) sentence -> conditions . THEN function + (2) conditions -> conditions . OR and_conditions + + THEN shift and go to state 17 + OR shift and go to state 18 + + +state 3 + + (3) conditions -> and_conditions . + (4) and_conditions -> and_conditions . AND not_conditions + + THEN reduce using rule 3 (conditions -> and_conditions .) + OR reduce using rule 3 (conditions -> and_conditions .) + RPAREN reduce using rule 3 (conditions -> and_conditions .) + AND shift and go to state 19 + + +state 4 + + (5) and_conditions -> not_conditions . + + AND reduce using rule 5 (and_conditions -> not_conditions .) + THEN reduce using rule 5 (and_conditions -> not_conditions .) + OR reduce using rule 5 (and_conditions -> not_conditions .) + RPAREN reduce using rule 5 (and_conditions -> not_conditions .) + + +state 5 + + (6) not_conditions -> NOT . cdt + (8) cdt -> . expr EQUAL expr + (9) cdt -> . expr NEQUAL expr + (10) cdt -> . expr GE expr + (11) cdt -> . expr GT expr + (12) cdt -> . expr LE expr + (13) cdt -> . expr LT expr + (14) cdt -> . LPAREN conditions RPAREN + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + LPAREN shift and go to state 8 + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + + cdt shift and go to state 20 + expr shift and go to state 7 + term shift and go to state 9 + factor shift and go to state 10 + +state 6 + + (7) not_conditions -> cdt . + + AND reduce using rule 7 (not_conditions -> cdt .) + THEN reduce using rule 7 (not_conditions -> cdt .) + OR reduce using rule 7 (not_conditions -> cdt .) + RPAREN reduce using rule 7 (not_conditions -> cdt .) + + +state 7 + + (8) cdt -> expr . EQUAL expr + (9) cdt -> expr . NEQUAL expr + (10) cdt -> expr . GE expr + (11) cdt -> expr . GT expr + (12) cdt -> expr . LE expr + (13) cdt -> expr . LT expr + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + EQUAL shift and go to state 21 + NEQUAL shift and go to state 22 + GE shift and go to state 23 + GT shift and go to state 24 + LE shift and go to state 25 + LT shift and go to state 26 + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 8 + + (14) cdt -> LPAREN . conditions RPAREN + (28) factor -> LPAREN . expr RPAREN + (2) conditions -> . conditions OR and_conditions + (3) conditions -> . and_conditions + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (4) and_conditions -> . and_conditions AND not_conditions + (5) and_conditions -> . not_conditions + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (6) not_conditions -> . NOT cdt + (7) not_conditions -> . cdt + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + (8) cdt -> . expr EQUAL expr + (9) cdt -> . expr NEQUAL expr + (10) cdt -> . expr GE expr + (11) cdt -> . expr GT expr + (12) cdt -> . expr LE expr + (13) cdt -> . expr LT expr + (14) cdt -> . LPAREN conditions RPAREN + + NOT shift and go to state 5 + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 29 + + conditions shift and go to state 30 + expr shift and go to state 31 + and_conditions shift and go to state 3 + term shift and go to state 9 + not_conditions shift and go to state 4 + factor shift and go to state 10 + cdt shift and go to state 6 + +state 9 + + (17) expr -> term . + (18) term -> term . TIMES factor + (19) term -> term . DIVIDE factor + (20) term -> term . MOD factor + + EQUAL reduce using rule 17 (expr -> term .) + NEQUAL reduce using rule 17 (expr -> term .) + GE reduce using rule 17 (expr -> term .) + GT reduce using rule 17 (expr -> term .) + LE reduce using rule 17 (expr -> term .) + LT reduce using rule 17 (expr -> term .) + PLUS reduce using rule 17 (expr -> term .) + MINUS reduce using rule 17 (expr -> term .) + RPAREN reduce using rule 17 (expr -> term .) + AND reduce using rule 17 (expr -> term .) + THEN reduce using rule 17 (expr -> term .) + OR reduce using rule 17 (expr -> term .) + COMMA reduce using rule 17 (expr -> term .) + TIMES shift and go to state 32 + DIVIDE shift and go to state 33 + MOD shift and go to state 34 + + +state 10 + + (21) term -> factor . + + TIMES reduce using rule 21 (term -> factor .) + DIVIDE reduce using rule 21 (term -> factor .) + MOD reduce using rule 21 (term -> factor .) + EQUAL reduce using rule 21 (term -> factor .) + NEQUAL reduce using rule 21 (term -> factor .) + GE reduce using rule 21 (term -> factor .) + GT reduce using rule 21 (term -> factor .) + LE reduce using rule 21 (term -> factor .) + LT reduce using rule 21 (term -> factor .) + PLUS reduce using rule 21 (term -> factor .) + MINUS reduce using rule 21 (term -> factor .) + RPAREN reduce using rule 21 (term -> factor .) + AND reduce using rule 21 (term -> factor .) + THEN reduce using rule 21 (term -> factor .) + OR reduce using rule 21 (term -> factor .) + COMMA reduce using rule 21 (term -> factor .) + + +state 11 + + (22) factor -> NUMBER . + + TIMES reduce using rule 22 (factor -> NUMBER .) + DIVIDE reduce using rule 22 (factor -> NUMBER .) + MOD reduce using rule 22 (factor -> NUMBER .) + EQUAL reduce using rule 22 (factor -> NUMBER .) + NEQUAL reduce using rule 22 (factor -> NUMBER .) + GE reduce using rule 22 (factor -> NUMBER .) + GT reduce using rule 22 (factor -> NUMBER .) + LE reduce using rule 22 (factor -> NUMBER .) + LT reduce using rule 22 (factor -> NUMBER .) + PLUS reduce using rule 22 (factor -> NUMBER .) + MINUS reduce using rule 22 (factor -> NUMBER .) + RPAREN reduce using rule 22 (factor -> NUMBER .) + AND reduce using rule 22 (factor -> NUMBER .) + THEN reduce using rule 22 (factor -> NUMBER .) + OR reduce using rule 22 (factor -> NUMBER .) + COMMA reduce using rule 22 (factor -> NUMBER .) + + +state 12 + + (23) factor -> STRING . + + TIMES reduce using rule 23 (factor -> STRING .) + DIVIDE reduce using rule 23 (factor -> STRING .) + MOD reduce using rule 23 (factor -> STRING .) + EQUAL reduce using rule 23 (factor -> STRING .) + NEQUAL reduce using rule 23 (factor -> STRING .) + GE reduce using rule 23 (factor -> STRING .) + GT reduce using rule 23 (factor -> STRING .) + LE reduce using rule 23 (factor -> STRING .) + LT reduce using rule 23 (factor -> STRING .) + PLUS reduce using rule 23 (factor -> STRING .) + MINUS reduce using rule 23 (factor -> STRING .) + RPAREN reduce using rule 23 (factor -> STRING .) + AND reduce using rule 23 (factor -> STRING .) + THEN reduce using rule 23 (factor -> STRING .) + OR reduce using rule 23 (factor -> STRING .) + COMMA reduce using rule 23 (factor -> STRING .) + + +state 13 + + (24) factor -> ID . + + TIMES reduce using rule 24 (factor -> ID .) + DIVIDE reduce using rule 24 (factor -> ID .) + MOD reduce using rule 24 (factor -> ID .) + EQUAL reduce using rule 24 (factor -> ID .) + NEQUAL reduce using rule 24 (factor -> ID .) + GE reduce using rule 24 (factor -> ID .) + GT reduce using rule 24 (factor -> ID .) + LE reduce using rule 24 (factor -> ID .) + LT reduce using rule 24 (factor -> ID .) + PLUS reduce using rule 24 (factor -> ID .) + MINUS reduce using rule 24 (factor -> ID .) + RPAREN reduce using rule 24 (factor -> ID .) + AND reduce using rule 24 (factor -> ID .) + THEN reduce using rule 24 (factor -> ID .) + OR reduce using rule 24 (factor -> ID .) + COMMA reduce using rule 24 (factor -> ID .) + + +state 14 + + (25) factor -> NULL . + + TIMES reduce using rule 25 (factor -> NULL .) + DIVIDE reduce using rule 25 (factor -> NULL .) + MOD reduce using rule 25 (factor -> NULL .) + EQUAL reduce using rule 25 (factor -> NULL .) + NEQUAL reduce using rule 25 (factor -> NULL .) + GE reduce using rule 25 (factor -> NULL .) + GT reduce using rule 25 (factor -> NULL .) + LE reduce using rule 25 (factor -> NULL .) + LT reduce using rule 25 (factor -> NULL .) + PLUS reduce using rule 25 (factor -> NULL .) + MINUS reduce using rule 25 (factor -> NULL .) + RPAREN reduce using rule 25 (factor -> NULL .) + AND reduce using rule 25 (factor -> NULL .) + THEN reduce using rule 25 (factor -> NULL .) + OR reduce using rule 25 (factor -> NULL .) + COMMA reduce using rule 25 (factor -> NULL .) + + +state 15 + + (26) factor -> TRUE . + + TIMES reduce using rule 26 (factor -> TRUE .) + DIVIDE reduce using rule 26 (factor -> TRUE .) + MOD reduce using rule 26 (factor -> TRUE .) + EQUAL reduce using rule 26 (factor -> TRUE .) + NEQUAL reduce using rule 26 (factor -> TRUE .) + GE reduce using rule 26 (factor -> TRUE .) + GT reduce using rule 26 (factor -> TRUE .) + LE reduce using rule 26 (factor -> TRUE .) + LT reduce using rule 26 (factor -> TRUE .) + PLUS reduce using rule 26 (factor -> TRUE .) + MINUS reduce using rule 26 (factor -> TRUE .) + RPAREN reduce using rule 26 (factor -> TRUE .) + AND reduce using rule 26 (factor -> TRUE .) + THEN reduce using rule 26 (factor -> TRUE .) + OR reduce using rule 26 (factor -> TRUE .) + COMMA reduce using rule 26 (factor -> TRUE .) + + +state 16 + + (27) factor -> FALSE . + + TIMES reduce using rule 27 (factor -> FALSE .) + DIVIDE reduce using rule 27 (factor -> FALSE .) + MOD reduce using rule 27 (factor -> FALSE .) + EQUAL reduce using rule 27 (factor -> FALSE .) + NEQUAL reduce using rule 27 (factor -> FALSE .) + GE reduce using rule 27 (factor -> FALSE .) + GT reduce using rule 27 (factor -> FALSE .) + LE reduce using rule 27 (factor -> FALSE .) + LT reduce using rule 27 (factor -> FALSE .) + PLUS reduce using rule 27 (factor -> FALSE .) + MINUS reduce using rule 27 (factor -> FALSE .) + RPAREN reduce using rule 27 (factor -> FALSE .) + AND reduce using rule 27 (factor -> FALSE .) + THEN reduce using rule 27 (factor -> FALSE .) + OR reduce using rule 27 (factor -> FALSE .) + COMMA reduce using rule 27 (factor -> FALSE .) + + +state 17 + + (1) sentence -> conditions THEN . function + (29) function -> . ID LPAREN variables RPAREN + + ID shift and go to state 36 + + function shift and go to state 35 + +state 18 + + (2) conditions -> conditions OR . and_conditions + (4) and_conditions -> . and_conditions AND not_conditions + (5) and_conditions -> . not_conditions + (6) not_conditions -> . NOT cdt + (7) not_conditions -> . cdt + (8) cdt -> . expr EQUAL expr + (9) cdt -> . expr NEQUAL expr + (10) cdt -> . expr GE expr + (11) cdt -> . expr GT expr + (12) cdt -> . expr LE expr + (13) cdt -> . expr LT expr + (14) cdt -> . LPAREN conditions RPAREN + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NOT shift and go to state 5 + LPAREN shift and go to state 8 + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + + and_conditions shift and go to state 37 + not_conditions shift and go to state 4 + cdt shift and go to state 6 + expr shift and go to state 7 + term shift and go to state 9 + factor shift and go to state 10 + +state 19 + + (4) and_conditions -> and_conditions AND . not_conditions + (6) not_conditions -> . NOT cdt + (7) not_conditions -> . cdt + (8) cdt -> . expr EQUAL expr + (9) cdt -> . expr NEQUAL expr + (10) cdt -> . expr GE expr + (11) cdt -> . expr GT expr + (12) cdt -> . expr LE expr + (13) cdt -> . expr LT expr + (14) cdt -> . LPAREN conditions RPAREN + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NOT shift and go to state 5 + LPAREN shift and go to state 8 + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + + not_conditions shift and go to state 38 + cdt shift and go to state 6 + expr shift and go to state 7 + term shift and go to state 9 + factor shift and go to state 10 + +state 20 + + (6) not_conditions -> NOT cdt . + + AND reduce using rule 6 (not_conditions -> NOT cdt .) + THEN reduce using rule 6 (not_conditions -> NOT cdt .) + OR reduce using rule 6 (not_conditions -> NOT cdt .) + RPAREN reduce using rule 6 (not_conditions -> NOT cdt .) + + +state 21 + + (8) cdt -> expr EQUAL . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 39 + term shift and go to state 9 + factor shift and go to state 10 + +state 22 + + (9) cdt -> expr NEQUAL . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 41 + term shift and go to state 9 + factor shift and go to state 10 + +state 23 + + (10) cdt -> expr GE . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 42 + term shift and go to state 9 + factor shift and go to state 10 + +state 24 + + (11) cdt -> expr GT . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 43 + term shift and go to state 9 + factor shift and go to state 10 + +state 25 + + (12) cdt -> expr LE . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 44 + term shift and go to state 9 + factor shift and go to state 10 + +state 26 + + (13) cdt -> expr LT . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 45 + term shift and go to state 9 + factor shift and go to state 10 + +state 27 + + (15) expr -> expr PLUS . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + term shift and go to state 46 + factor shift and go to state 10 + +state 28 + + (16) expr -> expr MINUS . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + term shift and go to state 47 + factor shift and go to state 10 + +state 29 + + (28) factor -> LPAREN . expr RPAREN + (14) cdt -> LPAREN . conditions RPAREN + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (2) conditions -> . conditions OR and_conditions + (3) conditions -> . and_conditions + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (4) and_conditions -> . and_conditions AND not_conditions + (5) and_conditions -> . not_conditions + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + (6) not_conditions -> . NOT cdt + (7) not_conditions -> . cdt + (8) cdt -> . expr EQUAL expr + (9) cdt -> . expr NEQUAL expr + (10) cdt -> . expr GE expr + (11) cdt -> . expr GT expr + (12) cdt -> . expr LE expr + (13) cdt -> . expr LT expr + (14) cdt -> . LPAREN conditions RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 29 + NOT shift and go to state 5 + + expr shift and go to state 31 + conditions shift and go to state 30 + term shift and go to state 9 + and_conditions shift and go to state 3 + factor shift and go to state 10 + not_conditions shift and go to state 4 + cdt shift and go to state 6 + +state 30 + + (14) cdt -> LPAREN conditions . RPAREN + (2) conditions -> conditions . OR and_conditions + + RPAREN shift and go to state 48 + OR shift and go to state 18 + + +state 31 + + (28) factor -> LPAREN expr . RPAREN + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + (8) cdt -> expr . EQUAL expr + (9) cdt -> expr . NEQUAL expr + (10) cdt -> expr . GE expr + (11) cdt -> expr . GT expr + (12) cdt -> expr . LE expr + (13) cdt -> expr . LT expr + + RPAREN shift and go to state 49 + PLUS shift and go to state 27 + MINUS shift and go to state 28 + EQUAL shift and go to state 21 + NEQUAL shift and go to state 22 + GE shift and go to state 23 + GT shift and go to state 24 + LE shift and go to state 25 + LT shift and go to state 26 + + +state 32 + + (18) term -> term TIMES . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + factor shift and go to state 50 + +state 33 + + (19) term -> term DIVIDE . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + factor shift and go to state 51 + +state 34 + + (20) term -> term MOD . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + factor shift and go to state 52 + +state 35 + + (1) sentence -> conditions THEN function . + + $end reduce using rule 1 (sentence -> conditions THEN function .) + + +state 36 + + (29) function -> ID . LPAREN variables RPAREN + + LPAREN shift and go to state 53 + + +state 37 + + (2) conditions -> conditions OR and_conditions . + (4) and_conditions -> and_conditions . AND not_conditions + + THEN reduce using rule 2 (conditions -> conditions OR and_conditions .) + OR reduce using rule 2 (conditions -> conditions OR and_conditions .) + RPAREN reduce using rule 2 (conditions -> conditions OR and_conditions .) + AND shift and go to state 19 + + +state 38 + + (4) and_conditions -> and_conditions AND not_conditions . + + AND reduce using rule 4 (and_conditions -> and_conditions AND not_conditions .) + THEN reduce using rule 4 (and_conditions -> and_conditions AND not_conditions .) + OR reduce using rule 4 (and_conditions -> and_conditions AND not_conditions .) + RPAREN reduce using rule 4 (and_conditions -> and_conditions AND not_conditions .) + + +state 39 + + (8) cdt -> expr EQUAL expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + AND reduce using rule 8 (cdt -> expr EQUAL expr .) + THEN reduce using rule 8 (cdt -> expr EQUAL expr .) + OR reduce using rule 8 (cdt -> expr EQUAL expr .) + RPAREN reduce using rule 8 (cdt -> expr EQUAL expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 40 + + (28) factor -> LPAREN . expr RPAREN + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 54 + term shift and go to state 9 + factor shift and go to state 10 + +state 41 + + (9) cdt -> expr NEQUAL expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + AND reduce using rule 9 (cdt -> expr NEQUAL expr .) + THEN reduce using rule 9 (cdt -> expr NEQUAL expr .) + OR reduce using rule 9 (cdt -> expr NEQUAL expr .) + RPAREN reduce using rule 9 (cdt -> expr NEQUAL expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 42 + + (10) cdt -> expr GE expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + AND reduce using rule 10 (cdt -> expr GE expr .) + THEN reduce using rule 10 (cdt -> expr GE expr .) + OR reduce using rule 10 (cdt -> expr GE expr .) + RPAREN reduce using rule 10 (cdt -> expr GE expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 43 + + (11) cdt -> expr GT expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + AND reduce using rule 11 (cdt -> expr GT expr .) + THEN reduce using rule 11 (cdt -> expr GT expr .) + OR reduce using rule 11 (cdt -> expr GT expr .) + RPAREN reduce using rule 11 (cdt -> expr GT expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 44 + + (12) cdt -> expr LE expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + AND reduce using rule 12 (cdt -> expr LE expr .) + THEN reduce using rule 12 (cdt -> expr LE expr .) + OR reduce using rule 12 (cdt -> expr LE expr .) + RPAREN reduce using rule 12 (cdt -> expr LE expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 45 + + (13) cdt -> expr LT expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + AND reduce using rule 13 (cdt -> expr LT expr .) + THEN reduce using rule 13 (cdt -> expr LT expr .) + OR reduce using rule 13 (cdt -> expr LT expr .) + RPAREN reduce using rule 13 (cdt -> expr LT expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 46 + + (15) expr -> expr PLUS term . + (18) term -> term . TIMES factor + (19) term -> term . DIVIDE factor + (20) term -> term . MOD factor + + EQUAL reduce using rule 15 (expr -> expr PLUS term .) + NEQUAL reduce using rule 15 (expr -> expr PLUS term .) + GE reduce using rule 15 (expr -> expr PLUS term .) + GT reduce using rule 15 (expr -> expr PLUS term .) + LE reduce using rule 15 (expr -> expr PLUS term .) + LT reduce using rule 15 (expr -> expr PLUS term .) + PLUS reduce using rule 15 (expr -> expr PLUS term .) + MINUS reduce using rule 15 (expr -> expr PLUS term .) + RPAREN reduce using rule 15 (expr -> expr PLUS term .) + AND reduce using rule 15 (expr -> expr PLUS term .) + THEN reduce using rule 15 (expr -> expr PLUS term .) + OR reduce using rule 15 (expr -> expr PLUS term .) + COMMA reduce using rule 15 (expr -> expr PLUS term .) + TIMES shift and go to state 32 + DIVIDE shift and go to state 33 + MOD shift and go to state 34 + + +state 47 + + (16) expr -> expr MINUS term . + (18) term -> term . TIMES factor + (19) term -> term . DIVIDE factor + (20) term -> term . MOD factor + + EQUAL reduce using rule 16 (expr -> expr MINUS term .) + NEQUAL reduce using rule 16 (expr -> expr MINUS term .) + GE reduce using rule 16 (expr -> expr MINUS term .) + GT reduce using rule 16 (expr -> expr MINUS term .) + LE reduce using rule 16 (expr -> expr MINUS term .) + LT reduce using rule 16 (expr -> expr MINUS term .) + PLUS reduce using rule 16 (expr -> expr MINUS term .) + MINUS reduce using rule 16 (expr -> expr MINUS term .) + RPAREN reduce using rule 16 (expr -> expr MINUS term .) + AND reduce using rule 16 (expr -> expr MINUS term .) + THEN reduce using rule 16 (expr -> expr MINUS term .) + OR reduce using rule 16 (expr -> expr MINUS term .) + COMMA reduce using rule 16 (expr -> expr MINUS term .) + TIMES shift and go to state 32 + DIVIDE shift and go to state 33 + MOD shift and go to state 34 + + +state 48 + + (14) cdt -> LPAREN conditions RPAREN . + + AND reduce using rule 14 (cdt -> LPAREN conditions RPAREN .) + THEN reduce using rule 14 (cdt -> LPAREN conditions RPAREN .) + OR reduce using rule 14 (cdt -> LPAREN conditions RPAREN .) + RPAREN reduce using rule 14 (cdt -> LPAREN conditions RPAREN .) + + +state 49 + + (28) factor -> LPAREN expr RPAREN . + + TIMES reduce using rule 28 (factor -> LPAREN expr RPAREN .) + DIVIDE reduce using rule 28 (factor -> LPAREN expr RPAREN .) + MOD reduce using rule 28 (factor -> LPAREN expr RPAREN .) + EQUAL reduce using rule 28 (factor -> LPAREN expr RPAREN .) + NEQUAL reduce using rule 28 (factor -> LPAREN expr RPAREN .) + GE reduce using rule 28 (factor -> LPAREN expr RPAREN .) + GT reduce using rule 28 (factor -> LPAREN expr RPAREN .) + LE reduce using rule 28 (factor -> LPAREN expr RPAREN .) + LT reduce using rule 28 (factor -> LPAREN expr RPAREN .) + PLUS reduce using rule 28 (factor -> LPAREN expr RPAREN .) + MINUS reduce using rule 28 (factor -> LPAREN expr RPAREN .) + RPAREN reduce using rule 28 (factor -> LPAREN expr RPAREN .) + AND reduce using rule 28 (factor -> LPAREN expr RPAREN .) + THEN reduce using rule 28 (factor -> LPAREN expr RPAREN .) + OR reduce using rule 28 (factor -> LPAREN expr RPAREN .) + COMMA reduce using rule 28 (factor -> LPAREN expr RPAREN .) + + +state 50 + + (18) term -> term TIMES factor . + + TIMES reduce using rule 18 (term -> term TIMES factor .) + DIVIDE reduce using rule 18 (term -> term TIMES factor .) + MOD reduce using rule 18 (term -> term TIMES factor .) + EQUAL reduce using rule 18 (term -> term TIMES factor .) + NEQUAL reduce using rule 18 (term -> term TIMES factor .) + GE reduce using rule 18 (term -> term TIMES factor .) + GT reduce using rule 18 (term -> term TIMES factor .) + LE reduce using rule 18 (term -> term TIMES factor .) + LT reduce using rule 18 (term -> term TIMES factor .) + PLUS reduce using rule 18 (term -> term TIMES factor .) + MINUS reduce using rule 18 (term -> term TIMES factor .) + RPAREN reduce using rule 18 (term -> term TIMES factor .) + AND reduce using rule 18 (term -> term TIMES factor .) + THEN reduce using rule 18 (term -> term TIMES factor .) + OR reduce using rule 18 (term -> term TIMES factor .) + COMMA reduce using rule 18 (term -> term TIMES factor .) + + +state 51 + + (19) term -> term DIVIDE factor . + + TIMES reduce using rule 19 (term -> term DIVIDE factor .) + DIVIDE reduce using rule 19 (term -> term DIVIDE factor .) + MOD reduce using rule 19 (term -> term DIVIDE factor .) + EQUAL reduce using rule 19 (term -> term DIVIDE factor .) + NEQUAL reduce using rule 19 (term -> term DIVIDE factor .) + GE reduce using rule 19 (term -> term DIVIDE factor .) + GT reduce using rule 19 (term -> term DIVIDE factor .) + LE reduce using rule 19 (term -> term DIVIDE factor .) + LT reduce using rule 19 (term -> term DIVIDE factor .) + PLUS reduce using rule 19 (term -> term DIVIDE factor .) + MINUS reduce using rule 19 (term -> term DIVIDE factor .) + RPAREN reduce using rule 19 (term -> term DIVIDE factor .) + AND reduce using rule 19 (term -> term DIVIDE factor .) + THEN reduce using rule 19 (term -> term DIVIDE factor .) + OR reduce using rule 19 (term -> term DIVIDE factor .) + COMMA reduce using rule 19 (term -> term DIVIDE factor .) + + +state 52 + + (20) term -> term MOD factor . + + TIMES reduce using rule 20 (term -> term MOD factor .) + DIVIDE reduce using rule 20 (term -> term MOD factor .) + MOD reduce using rule 20 (term -> term MOD factor .) + EQUAL reduce using rule 20 (term -> term MOD factor .) + NEQUAL reduce using rule 20 (term -> term MOD factor .) + GE reduce using rule 20 (term -> term MOD factor .) + GT reduce using rule 20 (term -> term MOD factor .) + LE reduce using rule 20 (term -> term MOD factor .) + LT reduce using rule 20 (term -> term MOD factor .) + PLUS reduce using rule 20 (term -> term MOD factor .) + MINUS reduce using rule 20 (term -> term MOD factor .) + RPAREN reduce using rule 20 (term -> term MOD factor .) + AND reduce using rule 20 (term -> term MOD factor .) + THEN reduce using rule 20 (term -> term MOD factor .) + OR reduce using rule 20 (term -> term MOD factor .) + COMMA reduce using rule 20 (term -> term MOD factor .) + + +state 53 + + (29) function -> ID LPAREN . variables RPAREN + (30) variables -> . variables COMMA expr + (31) variables -> . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + variables shift and go to state 55 + expr shift and go to state 56 + term shift and go to state 9 + factor shift and go to state 10 + +state 54 + + (28) factor -> LPAREN expr . RPAREN + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + RPAREN shift and go to state 49 + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 55 + + (29) function -> ID LPAREN variables . RPAREN + (30) variables -> variables . COMMA expr + + RPAREN shift and go to state 57 + COMMA shift and go to state 58 + + +state 56 + + (31) variables -> expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + RPAREN reduce using rule 31 (variables -> expr .) + COMMA reduce using rule 31 (variables -> expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + + +state 57 + + (29) function -> ID LPAREN variables RPAREN . + + $end reduce using rule 29 (function -> ID LPAREN variables RPAREN .) + + +state 58 + + (30) variables -> variables COMMA . expr + (15) expr -> . expr PLUS term + (16) expr -> . expr MINUS term + (17) expr -> . term + (18) term -> . term TIMES factor + (19) term -> . term DIVIDE factor + (20) term -> . term MOD factor + (21) term -> . factor + (22) factor -> . NUMBER + (23) factor -> . STRING + (24) factor -> . ID + (25) factor -> . NULL + (26) factor -> . TRUE + (27) factor -> . FALSE + (28) factor -> . LPAREN expr RPAREN + + NUMBER shift and go to state 11 + STRING shift and go to state 12 + ID shift and go to state 13 + NULL shift and go to state 14 + TRUE shift and go to state 15 + FALSE shift and go to state 16 + LPAREN shift and go to state 40 + + expr shift and go to state 59 + term shift and go to state 9 + factor shift and go to state 10 + +state 59 + + (30) variables -> variables COMMA expr . + (15) expr -> expr . PLUS term + (16) expr -> expr . MINUS term + + RPAREN reduce using rule 30 (variables -> variables COMMA expr .) + COMMA reduce using rule 30 (variables -> variables COMMA expr .) + PLUS shift and go to state 27 + MINUS shift and go to state 28 + diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py new file mode 100644 index 00000000..8a8ec277 --- /dev/null +++ b/script/local/parser/parsetab.py @@ -0,0 +1,61 @@ + +# parsetab.py +# This file is automatically generated. Do not edit. +# pylint: disable=W,C,R +_tabversion = '3.10' + +_lr_method = 'LALR' + +_lr_signature = 'AND COMMA DIVIDE EQUAL FALSE GE GT ID LE LPAREN LT MINUS MOD NEQUAL NOT NULL NUMBER OR PLUS RPAREN STRING THEN TIMES TRUEsentence : conditions THEN function \n conditions : conditions OR and_conditionsconditions : and_conditions\n and_conditions : and_conditions AND not_conditions\n and_conditions : not_conditionsnot_conditions : NOT cdtnot_conditions : cdt\n cdt : expr EQUAL expr\n | expr NEQUAL expr\n | expr GE expr\n | expr GT expr\n | expr LE expr\n | expr LT expr\n cdt : LPAREN conditions RPAREN\n expr : expr PLUS term\n | expr MINUS term\n expr : term\n term : term TIMES factor\n | term DIVIDE factor\n | term MOD factor\n term : factor\n factor : NUMBER\n | STRING\n factor : IDfactor : NULL\n factor : TRUE\n | FALSE\n factor : LPAREN expr RPARENfunction : ID LPAREN variables RPAREN\n variables : variables COMMA expr\n variables : expr' + +_lr_action_items = {'NOT':([0,8,18,19,29,],[5,5,5,5,5,]),'LPAREN':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,36,40,53,58,],[8,8,29,8,8,40,40,40,40,40,40,40,40,29,40,40,40,53,40,40,40,]),'NUMBER':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'STRING':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'ID':([0,5,8,17,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[13,13,13,36,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'NULL':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'TRUE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'FALSE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'$end':([1,35,57,],[0,-1,-29,]),'THEN':([2,3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[17,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'OR':([2,3,4,6,9,10,11,12,13,14,15,16,20,30,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[18,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,18,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'RPAREN':([3,4,6,9,10,11,12,13,14,15,16,20,30,31,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,54,55,56,59,],[-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,48,49,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,49,57,-31,-30,]),'AND':([3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[19,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,19,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'EQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[21,-17,-21,-22,-23,-24,-25,-26,-27,21,-15,-16,-28,-18,-19,-20,]),'NEQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[22,-17,-21,-22,-23,-24,-25,-26,-27,22,-15,-16,-28,-18,-19,-20,]),'GE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[23,-17,-21,-22,-23,-24,-25,-26,-27,23,-15,-16,-28,-18,-19,-20,]),'GT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[24,-17,-21,-22,-23,-24,-25,-26,-27,24,-15,-16,-28,-18,-19,-20,]),'LE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[25,-17,-21,-22,-23,-24,-25,-26,-27,25,-15,-16,-28,-18,-19,-20,]),'LT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[26,-17,-21,-22,-23,-24,-25,-26,-27,26,-15,-16,-28,-18,-19,-20,]),'PLUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[27,-17,-21,-22,-23,-24,-25,-26,-27,27,27,27,27,27,27,27,-15,-16,-28,-18,-19,-20,27,27,27,]),'MINUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[28,-17,-21,-22,-23,-24,-25,-26,-27,28,28,28,28,28,28,28,-15,-16,-28,-18,-19,-20,28,28,28,]),'COMMA':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,55,56,59,],[-17,-21,-22,-23,-24,-25,-26,-27,-15,-16,-28,-18,-19,-20,58,-31,-30,]),'TIMES':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[32,-21,-22,-23,-24,-25,-26,-27,32,32,-28,-18,-19,-20,]),'DIVIDE':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[33,-21,-22,-23,-24,-25,-26,-27,33,33,-28,-18,-19,-20,]),'MOD':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[34,-21,-22,-23,-24,-25,-26,-27,34,34,-28,-18,-19,-20,]),} + +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'sentence':([0,],[1,]),'conditions':([0,8,29,],[2,30,30,]),'and_conditions':([0,8,18,29,],[3,3,37,3,]),'not_conditions':([0,8,18,19,29,],[4,4,4,38,4,]),'cdt':([0,5,8,18,19,29,],[6,20,6,6,6,6,]),'expr':([0,5,8,18,19,21,22,23,24,25,26,29,40,53,58,],[7,7,31,7,7,39,41,42,43,44,45,31,54,56,59,]),'term':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,40,53,58,],[9,9,9,9,9,9,9,9,9,9,9,46,47,9,9,9,9,]),'factor':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[10,10,10,10,10,10,10,10,10,10,10,10,10,10,50,51,52,10,10,10,]),'function':([17,],[35,]),'variables':([53,],[55,]),} + +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> sentence","S'",1,None,None,None), + ('sentence -> conditions THEN function','sentence',3,'p_conditions_relation_function','myYACC.py',15), + ('conditions -> conditions OR and_conditions','conditions',3,'p_conditions_or','myYACC.py',21), + ('conditions -> and_conditions','conditions',1,'p_conditions_and_conditions','myYACC.py',25), + ('and_conditions -> and_conditions AND not_conditions','and_conditions',3,'p_and_conditions_and','myYACC.py',30), + ('and_conditions -> not_conditions','and_conditions',1,'p_and_conditions_cdt','myYACC.py',35), + ('not_conditions -> NOT cdt','not_conditions',2,'p_not_cdt','myYACC.py',39), + ('not_conditions -> cdt','not_conditions',1,'p_not_conditions_cdt','myYACC.py',43), + ('cdt -> expr EQUAL expr','cdt',3,'p_cdt_ops','myYACC.py',48), + ('cdt -> expr NEQUAL expr','cdt',3,'p_cdt_ops','myYACC.py',49), + ('cdt -> expr GE expr','cdt',3,'p_cdt_ops','myYACC.py',50), + ('cdt -> expr GT expr','cdt',3,'p_cdt_ops','myYACC.py',51), + ('cdt -> expr LE expr','cdt',3,'p_cdt_ops','myYACC.py',52), + ('cdt -> expr LT expr','cdt',3,'p_cdt_ops','myYACC.py',53), + ('cdt -> LPAREN conditions RPAREN','cdt',3,'p_cdt_parens','myYACC.py',73), + ('expr -> expr PLUS term','expr',3,'p_expr_plus_minus','myYACC.py',78), + ('expr -> expr MINUS term','expr',3,'p_expr_plus_minus','myYACC.py',79), + ('expr -> term','expr',1,'p_expr_term','myYACC.py',87), + ('term -> term TIMES factor','term',3,'p_term_times_divide_mod','myYACC.py',92), + ('term -> term DIVIDE factor','term',3,'p_term_times_divide_mod','myYACC.py',93), + ('term -> term MOD factor','term',3,'p_term_times_divide_mod','myYACC.py',94), + ('term -> factor','term',1,'p_term_factor','myYACC.py',104), + ('factor -> NUMBER','factor',1,'p_factor_assign_simple','myYACC.py',109), + ('factor -> STRING','factor',1,'p_factor_assign_simple','myYACC.py',110), + ('factor -> ID','factor',1,'p_factor_id','myYACC.py',115), + ('factor -> NULL','factor',1,'p_factor_null','myYACC.py',119), + ('factor -> TRUE','factor',1,'p_factor_bool','myYACC.py',124), + ('factor -> FALSE','factor',1,'p_factor_bool','myYACC.py',125), + ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','myYACC.py',133), + ('function -> ID LPAREN variables RPAREN','function',4,'p_function','myYACC.py',137), + ('variables -> variables COMMA expr','variables',3,'p_variables_comma','myYACC.py',141), + ('variables -> expr','variables',1,'p_variables_factor','myYACC.py',146), +] diff --git a/script/local/parser/utils.py b/script/local/parser/utils.py new file mode 100644 index 00000000..5150eded --- /dev/null +++ b/script/local/parser/utils.py @@ -0,0 +1,254 @@ +import optparse +import subprocess +from decimal import Decimal +import re +import sys + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.ParameterParsecheck import Parameter +from gspylib.common.Common import DefaultValue, ClusterCommand +from gspylib.common.ErrorCode import ErrorCode +from base_utils.common.fast_popen import FastPopen +from domain_utils.cluster_file.cluster_log import ClusterLog +from base_utils.os.env_util import EnvUtil +from domain_utils.cluster_os.cluster_user import ClusterUser +from domain_utils.domain_common.cluster_constants import ClusterConstants + +########## print + +nocolor = 0 +def set_color(nc): + global nocolor + nocolor = nc +# titles +def print_title_1(info): + global nocolor + if nocolor != 0: + print('======== ' + info +' ========') + else: + print('\033[0;37;46m======== ' + info +' ========\033[0m') + +def print_title_2(info): + print('-------- ' + info + ' --------') + +def print_title_3(info): + print('- - - ' + info + ' - - -') + +# infos +def print_info(info): + print_all('info', info) +def print_ok(info): + print_all('ok', info) +def print_bad(info): + print_all('bad', info) +def print_warn(info): + print_all('warn', info) +def print_unknown(info): + print_all('unknown', info) + +def print_all(opt, info): + global nocolor + if nocolor != 0: + print(info) + return + if opt == 'info': + print("\033[0;34;40m[info]\033[0m " + info) + elif opt == 'ok': + print("\033[0;32;40m[ok]\033[0m " + info) + elif opt == 'bad': + print("\033[0;31;40m[bad]\033[0m " + info) + elif opt == 'warn': + print("\033[0;33;40m[warn]\033[0m " + info) + elif opt == 'unknown': + print("\033[0;35;40m[unknown]\033[0m " + info) + else: + raise Exception('unknown print type :' + opt) + + + + + + + + +# operate cmd via ssh +os_cmd_prefix = '' +def set_os_cmd_prefix(prefix): + global os_cmd_prefix + os_cmd_prefix = prefix +def os_cmd(cmd): + global os_cmd_prefix + complete_cmd = os_cmd_prefix + ' "' + cmd + '"' + ret = subprocess.run(complete_cmd, stdout=subprocess.PIPE, shell=True) + return ret + +def get_sysctl(name): + name = name.replace('.', '/') + ret = os_cmd('cat /proc/sys/%s' % name) + if ret.returncode != 0: + print_unknown('unable to read systcl %s' % name) + return None + else: + return str(ret.stdout, encoding='utf-8').strip() + +# db related +def query(cursor, sql): + cursor.execute(sql) + columns = [desc[0] for desc in cursor.description] + results = [dict(zip(columns, row)) for row in cursor.fetchall()] + return results + +def is_later_version(cur_ver, min_ver): + min_major, min_minor = min_ver.split('.')[0], min_ver.split('.')[1] + cur_major, cur_minor = cur_ver.split('.')[0], cur_ver.split('.')[1] + min_major, min_minor, cur_major, cur_minor = int(min_major), int(min_minor), int(cur_major), int(cur_minor) + if cur_major > min_major: + return True + if cur_major == min_major: + return cur_minor >= min_minor + return False + +# advices +advices = {} +def add_advice(category, priority, advice): + if priority != 'high' and priority != 'medium' and priority != 'low': + raise Exception('Unknown advice priority : ' + priority) + if advice is None or advice.strip() == '': + raise Exception('No advice text') + if category not in advices: + advices[category] = {} + if priority not in advices[category]: + advices[category][priority] = [] + advices[category][priority].append(advice) + + +def show_advices(): + global nocolor + print_title_1('Following Are Advices') + cnt = 0 + for category in advices.keys(): + print_title_2(category) + if nocolor != 0: + for priority in advices[category].keys(): + for advice in advices[category][priority]: + print('[' + priority.upper() + ']' + advice) + cnt += 1 + else: + for priority in advices[category].keys(): + if priority == 'high': + print('\033[0;31;40m', end='') + elif priority == 'medium': + print('\033[0;33;40m',end='') + elif priority == 'low': + print('\033[0;34;40m',end='') + for advice in advices[category][priority]: + print('[' + priority.upper() + ']' + advice + '\033[0m') + cnt += 1 + if cnt == 0: + if nocolor != 0: + print('Everything is OK') + else: + print("\033[0;32;40m Everything is OK \033[0m") + +settings = None +def set_settings(sts): + global settings + settings = sts +def get_setting(name): + global settings + try: + return standard_units(settings[name]['setting'], settings[name]['unit']) + except KeyError: + print("config %s could not be found!" % name) + raise Exception("could not find config") + +# standard units +def standard_units(value, unit=None): + if unit is None and isinstance(value, str): + pattern = r'^-?\d+(\.\d+)?$' + if re.match(pattern, value): + return Decimal(value) + return value + value = Decimal(value) + if unit == 'KB' or unit == 'K' or unit == 'kB': + return value * 1024 + elif unit == '8KB' or unit == '8kB': + return value * 1024 * 8 + elif unit == '16KB' or unit == '16kB': + return value * 1024 * 16 + elif unit == 'MB' or unit == 'M' or unit == 'mB': + return value * 1024 * 1024 + elif unit == 'GB' or unit == 'G' or unit == 'gB': + return value * 1024 * 1024 * 1024 + elif unit == 'TB' or unit == 'T' or unit == 'tB': + return value * 1024 * 1024 * 1024 * 1024 + elif unit == 'PB' or unit == 'P' or unit == 'pB': + return value * 1024 * 1024 * 1024 * 1024 * 1024 + elif unit == 's': + return str(value) + 's' + elif unit == 'ms': + return str(value) + 'ms' + return value + +def format_size(size): + units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] + unit_index = 0 + if size is None: + return '' + while size >= 1024: + size = size / 1024 + unit_index += 1 + return "%.2f %s"%(size, units[unit_index]) + +min_s = 60 +hour_s = 60 * min_s +day_s = 24 * hour_s + +def format_epoch_to_time(epoch): + time = '' + if epoch > day_s: + days = "%d" % (epoch/day_s) + epoch = epoch % day_s + time += ' ' + days + 'd' + if epoch > hour_s: + hours = '%d' % (epoch / hour_s) + epoch = epoch % hour_s + time += ' ' + hours + 'h' + if epoch > min_s: + mins = '%d' % (epoch/min_s) + epoch = epoch % min_s + time += ' ' + mins + 'm' + time += ' ' + '%02d'%epoch + 's' + return time + +def format_percent(value): + return "%.2f%%" % value + +dependency_setting = {} +def set_dependency_settings(sts): + global dependency_settings + dependency_settings = sts + +def get_dependency_setting(name): + global dependency_settings + try: + return standard_units(dependency_settings[name]['setting'], dependency_settings[name]['unit']) + except KeyError: + raise Exception("could not find config") + +dependency_info = {} + +def add_dependency_info(level, category, info): + if level not in dependency_info: + dependency_info[level] = {} + if category not in dependency_info[level]: + dependency_info[level][category] = [] + dependency_info[level][category].append(info) + + +def show_dependency_info(): + for level in dependency_info.keys(): + for category in dependency_info[level].keys(): + for value in dependency_info[level][category]: + print(" Warning reason:"+category+":"+value) diff --git a/script/local/parser/variables.py b/script/local/parser/variables.py new file mode 100644 index 00000000..5db19b8d --- /dev/null +++ b/script/local/parser/variables.py @@ -0,0 +1,34 @@ +import sys +import os +localDirPath = os.path.dirname(os.path.realpath(__file__)) + +sys.path.append(sys.path[0] + "/../") +from local.parser.utils import get_dependency_setting +from decimal import Decimal + +variable_dict = { + 'udf_memory_limit' : 300, + 'max_process_memory' : 200, + 'hot_standby' : 'on', + 'enable_global_plancache' : 'on', + 'local_syscache_threshold' : 5*1024, + 'use_elastic_search' : 'on', + 'max_cached_tuplebufs' : 100, + 'max_changes_in_memory' : 100, + 'session_history_memory' : 400, + 'some_string' : 'some_string', + 'some_null' : None, +} + +def get_variable(name): + try: + val = get_dependency_setting(name) + except: + val = None + if not isinstance(val, str): + return val + if val.endswith('ms'): + return Decimal(val[:-2]) + elif val.endswith('s'): + return Decimal(val[:-1])*1000 + return val diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py new file mode 100644 index 00000000..3a065320 --- /dev/null +++ b/script/local/parser/yacc.py @@ -0,0 +1,2403 @@ +import re +import types +import sys +import inspect + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = False # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +error_count = 3 # Number of symbols that must be shifted to leave recovery mode +resultlimit = 40 # Size limit of results when running in debug mode. + +MAXINT = sys.maxsize + +class Logger(object): + def __init__(self, f): + self.f = f + + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + critical = debug + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + +# Exception raised for yacc-related errors +class YaccError(Exception): + pass + +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) > resultlimit: + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) + return result + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) < 16: + return repr_str + else: + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) +# .lexpos = Starting lex position +# .endlexpos = Ending lex position (optional, set automatically) + +class YaccSymbol: + def __str__(self): + return self.type + + def __repr__(self): + return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self, s, stack=None): + self.slice = s + self.stack = stack + self.lexer = None + self.parser = None + + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): + self.slice[n].value = v + + def __getslice__(self, i, j): + return [s.value for s in self.slice[i:j]] + + def __len__(self): + return len(self.slice) + + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) + + def set_lineno(self, n, lineno): + self.slice[n].lineno = lineno + + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline + + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) + + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos + + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + + def error(self): + raise SyntaxError + +# ----------------------------------------------------------------------------- +# == LRParser == +# +# The LR Parsing engine. +# ----------------------------------------------------------------------------- + +class LRParser: + def __init__(self, lrtab, errorf): + self.productions = lrtab.lr_productions + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True + + def errok(self): + self.errorok = True + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + # parse(). + # + # This is the core parsing engine. To operate, it requires a lexer object. + # Two options are provided. The debug flag turns on debugging so that you can + # see the various rule reductions and parsing steps. tracking turns on position + # tracking. In this mode, symbols will record the starting/ending line number and + # character index. + + def parse(self, input=None, lexer=None, debug=False, tracking=False): + # If debugging has been specified as a flag, turn it into a logging object + if isinstance(debug, int) and debug: + debug = Logger(sys.stderr) + + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + if debug: + debug.info('PARSE DEBUG START') + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + # Set the token function + get_token = self.token = lexer.token + + # Set up the state and symbol stacks + statestack = self.statestack = [] # Stack of parsing states + symstack = self.symstack = [] # Stack of grammar symbols + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + if debug: + debug.debug('State : %s', state) + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if debug: + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + + if debug: + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + if debug: + debug.debug('Action : Shift and goto state %s', t) + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if debug: + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + if debug: + debug.info('Result : %s', format_result(pslice[0])) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + + else: + + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + if debug: + debug.info('Result : %s', format_result(pslice[0])) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + + if debug: + debug.info('Done : Returning %s', format_result(result)) + debug.info('PARSE DEBUG END') + + return result + + if t is None: + + if debug: + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = self.errorfunc(errtoken) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + statestack.pop() + state = statestack[-1] + + continue + + # If we'r here, something really bad happened + raise RuntimeError('yacc: internal parser error!!!\n') + +# ----------------------------------------------------------------------------- +# === Grammar Representation === +# +# The following functions, classes, and variables are used to represent and +# manipulate the rules that make up a grammar. +# ----------------------------------------------------------------------------- + +# regex matching identifiers +_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# A grammar rule refers to a specification such as this: +# +# expr : expr PLUS term +# +# Here are the basic attributes defined on all productions +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','PLUS','term'] +# prec - Production precedence level +# number - Production number. +# func - Function that executes on reduce +# file - File where production function is defined +# lineno - Line number where production function is defined +# +# The following attributes are defined or optional. +# +# len - Length of the production (number of symbols on right hand side) +# usyms - Set of unique symbols found in the production +# ----------------------------------------------------------------------------- + +class Production(object): + reduced = 0 + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func + self.callable = None + self.file = file + self.line = line + self.prec = precedence + + # Internal settings used during table construction + + self.len = len(self.prod) # Length of the production + + # Create a list of unique production symbols used in the production + self.usyms = [] + for s in self.prod: + if s not in self.usyms: + self.usyms.append(s) + + # List of all LR items for the production + self.lr_items = [] + self.lr_next = None + + # Create a string representation + if self.prod: + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + self.str = '%s -> ' % self.name + + def __str__(self): + return self.str + + def __repr__(self): + return 'Production(' + str(self) + ')' + + def __len__(self): + return len(self.prod) + + def __nonzero__(self): + return 1 + + def __getitem__(self, index): + return self.prod[index] + + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. + try: + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): + p.lr_after = [] + try: + p.lr_before = p.prod[n-1] + except IndexError: + p.lr_before = None + return p + + # Bind the production function name to a callable + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] + +# ----------------------------------------------------------------------------- +# class LRItem +# +# This class represents a specific stage of parsing a production rule. For +# example: +# +# expr : expr . PLUS term +# +# In the above, the "." represents the current location of the parse. Here +# basic attributes: +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] +# number - Production number. +# +# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' +# then lr_next refers to 'expr -> expr PLUS . term' +# lr_index - LR item index (location of the ".") in the prod list. +# lookaheads - LALR lookahead symbols for this item +# len - Length of the production (number of symbols on right hand side) +# lr_after - List of all productions that immediately follow +# lr_before - Grammar symbol immediately before +# ----------------------------------------------------------------------------- + +class LRItem(object): + def __init__(self, p, n): + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n + self.lookaheads = {} + self.prod.insert(n, '.') + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms + + def __str__(self): + if self.prod: + s = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + s = '%s -> ' % self.name + return s + + def __repr__(self): + return 'LRItem(' + str(self) + ')' + +# ----------------------------------------------------------------------------- +# rightmost_terminal() +# +# Return the rightmost terminal from a list of symbols. Used in add_production() +# ----------------------------------------------------------------------------- +def rightmost_terminal(symbols, terminals): + i = len(symbols) - 1 + while i >= 0: + if symbols[i] in terminals: + return symbols[i] + i -= 1 + return None + +# ----------------------------------------------------------------------------- +# === GRAMMAR CLASS === +# +# The following class represents the contents of the specified grammar along +# with various computed properties such as first sets, follow sets, LR items, etc. +# This data is used for critical parts of the table generation process later. +# ----------------------------------------------------------------------------- + +class GrammarError(YaccError): + pass + +class Grammar(object): + def __init__(self, terminals): + self.Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + self.Prodmap = {} # A dictionary that is only used to detect duplicate + # productions. + + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + for term in terminals: + self.Terminals[term] = [] + + self.Terminals['error'] = [] + + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + self.First = {} # A dictionary of precomputed FIRST(x) symbols + + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols + + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + + self.Start = None # Starting symbol for the grammar + + + def __len__(self): + return len(self.Productions) + + def __getitem__(self, index): + return self.Productions[index] + + # ----------------------------------------------------------------------------- + # set_precedence() + # + # Sets the precedence for a given terminal. assoc is the associativity such as + # 'left','right', or 'nonassoc'. level is a numeric level. + # + # ----------------------------------------------------------------------------- + + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' + if term in self.Precedence: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: + raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") + self.Precedence[term] = (assoc, level) + + # ----------------------------------------------------------------------------- + # add_production() + # + # Given an action function, this function assembles a production rule and + # computes its precedence level. + # + # The production rule is supplied as a list of symbols. For example, + # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and + # symbols ['expr','PLUS','term']. + # + # Precedence is determined by the precedence of the right-most non-terminal + # or the precedence of a terminal specified by %prec. + # + # A variety of error checks are performed to make sure production symbols + # are valid and that %prec is used correctly. + # ----------------------------------------------------------------------------- + + def add_production(self, prodname, syms, func=None, file='', line=0): + + if prodname in self.Terminals: + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) + if prodname == 'error': + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) + if not _is_identifier.match(prodname): + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) + + # Look for literal tokens + for n, s in enumerate(syms): + if s[0] in "'\"": + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass + if not _is_identifier.match(s) and s != '%prec': + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + + # Determine the precedence level + if '%prec' in syms: + if syms[-1] == '%prec': + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) + if syms[-2] != '%prec': + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) + precname = syms[-1] + prodprec = self.Precedence.get(precname) + if not prodprec: + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) + else: + self.UsedPrecedence.add(precname) + del syms[-2:] # Drop %prec from the rule + else: + # If no %prec, precedence is determined by the rightmost terminal symbol + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + + # See if the rule is already in the rulemap + map = '%s -> %s' % (prodname, syms) + if map in self.Prodmap: + m = self.Prodmap[map] + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) + + # From this point on, everything is valid. Create a new Production instance + pnumber = len(self.Productions) + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] + + # Add the production number to Terminals and Nonterminals + for t in syms: + if t in self.Terminals: + self.Terminals[t].append(pnumber) + else: + if t not in self.Nonterminals: + self.Nonterminals[t] = [] + self.Nonterminals[t].append(pnumber) + + # Create a production and add it to the list of productions + p = Production(pnumber, prodname, syms, prodprec, func, file, line) + self.Productions.append(p) + self.Prodmap[map] = p + + # Add to the global productions list + try: + self.Prodnames[prodname].append(p) + except KeyError: + self.Prodnames[prodname] = [p] + + # ----------------------------------------------------------------------------- + # set_start() + # + # Sets the starting symbol and creates the augmented grammar. Production + # rule 0 is S' -> start where start is the start symbol. + # ----------------------------------------------------------------------------- + + def set_start(self, start=None): + if not start: + start = self.Productions[1].name + if start not in self.Nonterminals: + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) + self.Nonterminals[start].append(0) + self.Start = start + + # ----------------------------------------------------------------------------- + # find_unreachable() + # + # Find all of the nonterminal symbols that can't be reached from the starting + # symbol. Returns a list of nonterminals that can't be reached. + # ----------------------------------------------------------------------------- + + def find_unreachable(self): + + # Mark all symbols that are reachable from a symbol s + def mark_reachable_from(s): + if s in reachable: + return + reachable.add(s) + for p in self.Prodnames.get(s, []): + for r in p.prod: + mark_reachable_from(r) + + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] + + # ----------------------------------------------------------------------------- + # infinite_cycles() + # + # This function looks at the various parsing rules and tries to detect + # infinite recursion cycles (grammar rules where there is no possible way + # to derive a string of only terminals). + # ----------------------------------------------------------------------------- + + def infinite_cycles(self): + terminates = {} + + # Terminals: + for t in self.Terminals: + terminates[t] = True + + terminates['$end'] = True + + # Nonterminals: + + # Initialize to false: + for n in self.Nonterminals: + terminates[n] = False + + # Then propagate termination until no change: + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = False + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = True + + if p_terminates: + # symbol n terminates! + if not terminates[n]: + terminates[n] = True + some_change = True + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + infinite = [] + for (s, term) in terminates.items(): + if not term: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + infinite.append(s) + + return infinite + + # ----------------------------------------------------------------------------- + # undefined_symbols() + # + # Find all symbols that were used the grammar, but not defined as tokens or + # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol + # and prod is the production where the symbol was used. + # ----------------------------------------------------------------------------- + def undefined_symbols(self): + result = [] + for p in self.Productions: + if not p: + continue + + for s in p.prod: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) + return result + + # ----------------------------------------------------------------------------- + # unused_terminals() + # + # Find all terminals that were defined, but not used by the grammar. Returns + # a list of all symbols. + # ----------------------------------------------------------------------------- + def unused_terminals(self): + unused_tok = [] + for s, v in self.Terminals.items(): + if s != 'error' and not v: + unused_tok.append(s) + + return unused_tok + + # ------------------------------------------------------------------------------ + # unused_rules() + # + # Find all grammar rules that were defined, but not used (maybe not reachable) + # Returns a list of productions. + # ------------------------------------------------------------------------------ + + def unused_rules(self): + unused_prod = [] + for s, v in self.Nonterminals.items(): + if not v: + p = self.Prodnames[s][0] + unused_prod.append(p) + return unused_prod + + # ----------------------------------------------------------------------------- + # unused_precedence() + # + # Returns a list of tuples (term,precedence) corresponding to precedence + # rules that were never used by the grammar. term is the name of the terminal + # on which precedence was applied and precedence is a string such as 'left' or + # 'right' corresponding to the type of precedence. + # ----------------------------------------------------------------------------- + + def unused_precedence(self): + unused = [] + for termname in self.Precedence: + if not (termname in self.Terminals or termname in self.UsedPrecedence): + unused.append((termname, self.Precedence[termname][0])) + + return unused + + # ------------------------------------------------------------------------- + # _first() + # + # Compute the value of FIRST1(beta) where beta is a tuple of symbols. + # + # During execution of compute_first1, the result may be incomplete. + # Afterward (e.g., when called from compute_follow()), it will be complete. + # ------------------------------------------------------------------------- + def _first(self, beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [] + for x in beta: + x_produces_empty = False + + # Add all the non- symbols of First[x] to the result. + for f in self.First[x]: + if f == '': + x_produces_empty = True + else: + if f not in result: + result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('') + + return result + + # ------------------------------------------------------------------------- + # compute_first() + # + # Compute the value of FIRST1(X) for all symbols + # ------------------------------------------------------------------------- + def compute_first(self): + if self.First: + return self.First + + # Terminals: + for t in self.Terminals: + self.First[t] = [t] + + self.First['$end'] = ['$end'] + + # Nonterminals: + + # Initialize to the empty set: + for n in self.Nonterminals: + self.First[n] = [] + + # Then propagate symbols until no change: + while True: + some_change = False + for n in self.Nonterminals: + for p in self.Prodnames[n]: + for f in self._first(p.prod): + if f not in self.First[n]: + self.First[n].append(f) + some_change = True + if not some_change: + break + + return self.First + + # --------------------------------------------------------------------- + # compute_follow() + # + # Computes all of the follow sets for every non-terminal symbol. The + # follow set is the set of all symbols that might follow a given + # non-terminal. See the Dragon book, 2nd Ed. p. 189. + # --------------------------------------------------------------------- + def compute_follow(self, start=None): + # If already computed, return the result + if self.Follow: + return self.Follow + + # If first sets not computed yet, do that first. + if not self.First: + self.compute_first() + + # Add '$end' to the follow list of the start symbol + for k in self.Nonterminals: + self.Follow[k] = [] + + if not start: + start = self.Productions[1].name + + self.Follow[start] = ['$end'] + + while True: + didadd = False + for p in self.Productions[1:]: + # Here is the production set + for i, B in enumerate(p.prod): + if B in self.Nonterminals: + # Okay. We got a non-terminal in a production + fst = self._first(p.prod[i+1:]) + hasempty = False + for f in fst: + if f != '' and f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if f == '': + hasempty = True + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in self.Follow[p.name]: + if f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if not didadd: + break + return self.Follow + + + # ----------------------------------------------------------------------------- + # build_lritems() + # + # This function walks the list of productions and builds a complete set of the + # LR items. The LR items are stored in two ways: First, they are uniquely + # numbered and placed in the list _lritems. Second, a linked list of LR items + # is built for each production. For example: + # + # E -> E PLUS E + # + # Creates the list + # + # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] + # ----------------------------------------------------------------------------- + + def build_lritems(self): + for p in self.Productions: + lastlri = p + i = 0 + lr_items = [] + while True: + if i > len(p): + lri = None + else: + lri = LRItem(p, i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.Prodnames[lri.prod[i+1]] + except (IndexError, KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i-1] + except IndexError: + lri.lr_before = None + + lastlri.lr_next = lri + if not lri: + break + lr_items.append(lri) + lastlri = lri + i += 1 + p.lr_items = lr_items + +# ----------------------------------------------------------------------------- +# === LR Generator === +# +# The following classes and functions are used to generate LR parsing tables on +# a grammar. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# digraph() +# traverse() +# +# The following two functions are used to compute set valued functions +# of the form: +# +# F(x) = F'(x) U U{F(y) | x R y} +# +# This is used to compute the values of Read() sets as well as FOLLOW sets +# in LALR(1) generation. +# +# Inputs: X - An input set +# R - A relation +# FP - Set-valued function +# ------------------------------------------------------------------------------ + +def digraph(X, R, FP): + N = {} + for x in X: + N[x] = 0 + stack = [] + F = {} + for x in X: + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) + return F + +def traverse(x, N, stack, F, X, R, FP): + stack.append(x) + d = len(stack) + N[x] = d + F[x] = FP(x) # F(X) <- F'(x) + + rel = R(x) # Get y's related to x + for y in rel: + if N[y] == 0: + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) + if N[x] == d: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): + pass + + +# ----------------------------------------------------------------------------- +# == LRTable == +# +# This class implements the LR table generation algorithm. There are no +# public methods. +# ----------------------------------------------------------------------------- + +class LRTable: + def __init__(self, grammar, log=None): + self.grammar = grammar + + # Set up the logger + if not log: + log = NullLogger() + self.log = log + + # Internal attributes + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures + + self._add_count = 0 # Internal counter used to detect cycles + + # Diagnostic information filled in by the table generator + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts + + self.sr_conflicts = [] + self.rr_conflicts = [] + + # Build the tables + self.grammar.build_lritems() + self.grammar.compute_first() + self.grammar.compute_follow() + self.lr_parse_table() + + # Bind all production function names to callable objects in pdict + def bind_callables(self, pdict): + for p in self.lr_productions: + p.bind(pdict) + + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. + + def lr0_closure(self, I): + self._add_count += 1 + + # Add everything in I to J + J = I[:] + didadd = True + while didadd: + didadd = False + for j in J: + for x in j.lr_after: + if getattr(x, 'lr0_added', 0) == self._add_count: + continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = self._add_count + didadd = True + + return J + + # Compute the LR(0) goto function goto(I,X) where I is a set + # of LR(0) items and X is a grammar symbol. This function is written + # in a way that guarantees uniqueness of the generated goto sets + # (i.e. the same goto set will never be returned as two different Python + # objects). With uniqueness, we can later do fast set comparisons using + # id(obj) instead of element-wise comparison. + + def lr0_goto(self, I, x): + # First we look for a previously cached entry + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = self.lr_goto_cache.get(x) + if not s: + s = {} + self.lr_goto_cache[x] = s + + gs = [] + for p in I: + n = p.lr_next + if n and n.lr_before == x: + s1 = s.get(id(n)) + if not s1: + s1 = {} + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end') + if not g: + if gs: + g = self.lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + self.lr_goto_cache[(id(I), x)] = g + return g + + # Compute the LR(0) sets of item function + def lr0_items(self): + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] + i = 0 + for I in C: + self.lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = {} + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms: + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue + self.lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + + # ----------------------------------------------------------------------------- + # ==== LALR(1) Parsing ==== + # + # LALR(1) parsing is almost exactly the same as SLR except that instead of + # relying upon Follow() sets when performing reductions, a more selective + # lookahead set that incorporates the state of the LR(0) machine is utilized. + # Thus, we mainly just have to focus on calculating the lookahead sets. + # + # The method used here is due to DeRemer and Pennelo (1982). + # + # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) + # Lookahead Sets", ACM Transactions on Programming Languages and Systems, + # Vol. 4, No. 4, Oct. 1982, pp. 615-649 + # + # Further details can also be found in: + # + # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", + # McGraw-Hill Book Company, (1985). + # + # ----------------------------------------------------------------------------- + + # ----------------------------------------------------------------------------- + # compute_nullable_nonterminals() + # + # Creates a dictionary containing all of the non-terminals that might produce + # an empty production. + # ----------------------------------------------------------------------------- + + def compute_nullable_nonterminals(self): + nullable = set() + num_nullable = 0 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) + continue + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) + return nullable + + # ----------------------------------------------------------------------------- + # find_nonterminal_trans(C) + # + # Given a set of LR(0) items, this functions finds all of the non-terminal + # transitions. These are transitions in which a dot appears immediately before + # a non-terminal. Returns a list of tuples of the form (state,N) where state + # is the state number and N is the nonterminal symbol. + # + # The input C is the set of LR(0) items. + # ----------------------------------------------------------------------------- + + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans + + # ----------------------------------------------------------------------------- + # dr_relation() + # + # Computes the DR(p,A) relationships for non-terminal transitions. The input + # is a tuple (state,N) where state is a number and N is a nonterminal symbol. + # + # Returns a list of terminals. + # ----------------------------------------------------------------------------- + + def dr_relation(self, C, trans, nullable): + state, N = trans + terms = [] + + g = self.lr0_goto(C[state], N) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) + + # This extra bit is to handle the start state + if state == 0 and N == self.grammar.Productions[0].prod[0]: + terms.append('$end') + + return terms + + # ----------------------------------------------------------------------------- + # reads_relation() + # + # Computes the READS() relation (p,A) READS (t,C). + # ----------------------------------------------------------------------------- + + def reads_relation(self, C, trans, empty): + # Look for empty transitions + rel = [] + state, N = trans + + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) + + return rel + + # ----------------------------------------------------------------------------- + # compute_lookback_includes() + # + # Determines the lookback and includes relations + # + # LOOKBACK: + # + # This relation is determined by running the LR(0) state machine forward. + # For example, starting with a production "N : . A B C", we run it forward + # to obtain "N : A B C ." We then build a relationship between this final + # state and the starting state. These relationships are stored in a dictionary + # lookdict. + # + # INCLUDES: + # + # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). + # + # This relation is used to determine non-terminal transitions that occur + # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) + # if the following holds: + # + # B -> LAT, where T -> epsilon and p' -L-> p + # + # L is essentially a prefix (which may be empty), T is a suffix that must be + # able to derive an empty string. State p' must lead to state p with the string L. + # + # ----------------------------------------------------------------------------- + + def compute_lookback_includes(self, C, trans, nullable): + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations + + # Make a dictionary of non-terminal transitions + dtrans = {} + for t in trans: + dtrans[t] = 1 + + # Loop over all transitions and compute lookbacks and includes + for state, N in trans: + lookb = [] + includes = [] + for p in C[state]: + if p.name != N: + continue + + # Okay, we have a name match. We now follow the production all the way + # through the state machine until we get the . on the right hand side + + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state + + # When we get here, j is the final state, now we have to locate the production + for r in C[j]: + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) + for i in includes: + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb + + return lookdict, includedict + + # ----------------------------------------------------------------------------- + # compute_read_sets() + # + # Given a set of LR(0) items, this function computes the read sets. + # + # Inputs: C = Set of LR(0) items + # ntrans = Set of nonterminal transitions + # nullable = Set of empty transitions + # + # Returns a set containing the read sets + # ----------------------------------------------------------------------------- + + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # compute_follow_sets() + # + # Given a set of LR(0) items, a set of non-terminal transitions, a readset, + # and an include set, this function computes the follow sets + # + # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} + # + # Inputs: + # ntrans = Set of nonterminal transitions + # readsets = Readset (previously computed) + # inclsets = Include sets (previously computed) + # + # Returns a set containing the follow sets + # ----------------------------------------------------------------------------- + + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # add_lookaheads() + # + # Attaches the lookahead symbols to grammar rules. + # + # Inputs: lookbacks - Set of lookback relations + # followset - Computed follow set + # + # This function directly attaches the lookaheads to productions contained + # in the lookbacks set + # ----------------------------------------------------------------------------- + + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): + # Loop over productions in lookback + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) + + # ----------------------------------------------------------------------------- + # add_lalr_lookaheads() + # + # This function does all of the work of adding lookahead information for use + # with LALR parsing + # ----------------------------------------------------------------------------- + + def add_lalr_lookaheads(self, C): + # Determine all of the nullable nonterminals + nullable = self.compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = self.find_nonterminal_transitions(C) + + # Compute read sets + readsets = self.compute_read_sets(C, trans, nullable) + + # Compute lookback/includes relations + lookd, included = self.compute_lookback_includes(C, trans, nullable) + + # Compute LALR FOLLOW sets + followsets = self.compute_follow_sets(trans, readsets, included) + + # Add all of the lookaheads + self.add_lookaheads(lookd, followsets) + + # ----------------------------------------------------------------------------- + # lr_parse_table() + # + # This function constructs the parse tables for SLR or LALR + # ----------------------------------------------------------------------------- + def lr_parse_table(self): + Productions = self.grammar.Productions + Precedence = self.grammar.Precedence + goto = self.lr_goto # Goto array + action = self.lr_action # Action array + log = self.log # Logger for output + + actionp = {} # Action production array (temporary) + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = self.lr0_items() + self.add_lalr_lookaheads(C) + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') + for p in I: + log.info(' (%d) %s', p.number, p) + log.info('') + + for p in I: + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + # We are at the end of a production. Reduce! + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp, rejectp = pp, oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 + else: + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + # We are in a shift state + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError('Shift/shift conflict in state %d' % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = j + st_actionp[a] = p + + # Print the actions associated with each terminal + _actprint = {} + for a, p, m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') + # Print the actions that were not used. (debugging) + not_used = 0 + for a, p, m in actlist: + if a in st_action: + if p is not st_actionp[a]: + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) + not_used = 1 + _actprint[(a, m)] = 1 + if not_used: + log.debug('') + + # Construct the goto table for this state + + nkeys = {} + for ii in I: + for s in ii.usyms: + if s in self.grammar.Nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + st_goto[n] = j + log.info(' %-30s shift and go to state %d', n, j) + + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict + +# ----------------------------------------------------------------------------- +# parse_grammar() +# +# This takes a raw grammar rule string and parses it into production data +# ----------------------------------------------------------------------------- +def parse_grammar(doc, file, line): + grammar = [] + # Split the doc string into lines + pstrings = doc.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: + continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) + + grammar.append((file, dline, prodname, syms)) + except SyntaxError: + raise + except Exception: + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) + + return grammar + +# ----------------------------------------------------------------------------- +# ParserReflect() +# +# This class represents information extracted for building a parser including +# start symbol, error function, tokens, precedence list, action functions, +# etc. +# ----------------------------------------------------------------------------- +class ParserReflect(object): + def __init__(self, pdict, log=None): + self.pdict = pdict + self.start = None + self.error_func = None + self.tokens = None + self.modules = set() + self.grammar = [] + self.error = False + + if log is None: + self.log = Logger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_start() + self.get_error_func() + self.get_tokens() + self.get_precedence() + self.get_pfunctions() + + # Validate all of the information + def validate_all(self): + self.validate_start() + self.validate_error_func() + self.validate_tokens() + self.validate_precedence() + self.validate_pfunctions() + self.validate_modules() + return self.error + + # Compute a signature over the grammar + def signature(self): + parts = [] + try: + if self.start: + parts.append(self.start) + if self.prec: + parts.append(''.join([''.join(p) for p in self.prec])) + if self.tokens: + parts.append(' '.join(self.tokens)) + for f in self.pfuncs: + if f[3]: + parts.append(f[3]) + except (TypeError, ValueError): + pass + return ''.join(parts) + + # ----------------------------------------------------------------------------- + # validate_modules() + # + # This method checks to see if there are duplicated p_rulename() functions + # in the parser module file. Without this function, it is really easy for + # users to make mistakes by cutting and pasting code fragments (and it's a real + # bugger to try and figure out why the resulting parser doesn't work). Therefore, + # we just do a little regular expression pattern matching of def statements + # to try and detect duplicates. + # ----------------------------------------------------------------------------- + + def validate_modules(self): + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + + for module in self.modules: + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + continue + + counthash = {} + for linen, line in enumerate(lines): + linen += 1 + m = fre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) + + # Get the start symbol + def get_start(self): + self.start = self.pdict.get('start') + + # Validate the start symbol + def validate_start(self): + if self.start is not None: + if not isinstance(self.start, str): + self.log.error("'start' must be a string") + + # Look for error handler + def get_error_func(self): + self.error_func = self.pdict.get('p_error') + + # Validate the error function + def validate_error_func(self): + if self.error_func: + if isinstance(self.error_func, types.FunctionType): + ismethod = 0 + elif isinstance(self.error_func, types.MethodType): + ismethod = 1 + else: + self.log.error("'p_error' defined, but is not a function or method") + self.error = True + return + + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) + + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True + + # Get the tokens map + def get_tokens(self): + tokens = self.pdict.get('tokens') + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = sorted(tokens) + + # Validate the tokens + def validate_tokens(self): + # Validate the tokens. + if 'error' in self.tokens: + self.log.error("Illegal token name 'error'. Is a reserved word") + self.error = True + return + + terminals = set() + for n in self.tokens: + if n in terminals: + self.log.warning('Token %r multiply defined', n) + terminals.add(n) + + # Get the precedence map (if any) + def get_precedence(self): + self.prec = self.pdict.get('precedence') + + # Validate and parse the precedence map + def validate_precedence(self): + preclist = [] + if self.prec: + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True + return + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True + return + + if len(p) < 2: + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True + return + assoc = p[0] + if not isinstance(assoc, str): + self.log.error('precedence associativity must be a string') + self.error = True + return + for term in p[1:]: + if not isinstance(term, str): + self.log.error('precedence items must be strings') + self.error = True + return + preclist.append((term, assoc, level+1)) + self.preclist = preclist + + # Get all p_functions from the grammar + def get_pfunctions(self): + p_functions = [] + for name, item in self.pdict.items(): + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) + self.pfuncs = p_functions + + # Validate all of the p_functions + def validate_pfunctions(self): + grammar = [] + # Check for non-empty symbols + if len(self.pfuncs) == 0: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) + func = self.pdict[name] + if isinstance(func, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True + elif not func.__doc__: + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) + else: + try: + parsed_g = parse_grammar(doc, file, line) + for g in parsed_g: + grammar.append((name, g)) + except SyntaxError as e: + self.log.error(str(e)) + self.error = True + + # Looks like a valid grammar rule + # Mark the file in which defined. + self.modules.add(module) + + # Secondary validation step that looks for p_ definitions that are not functions + # or functions that look like they might be grammar rules. + + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass + + self.grammar = grammar + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build a parser +# ----------------------------------------------------------------------------- + +def yacc(*, debug=yaccdebug, module=None, start=None, + check_recursion=True, optimize=False, debugfile=debug_file, + debuglog=None, errorlog=None): + + # Reference to the parsing method of the last built parser + global parse + + if errorlog is None: + errorlog = Logger(sys.stderr) + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ + else: + pdict = get_caller_module_dict(2) + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + + # Collect parser information from the dictionary + pinfo = ParserReflect(pdict, log=errorlog) + pinfo.get_all() + + if pinfo.error: + raise YaccError('Unable to build parser') + + if debuglog is None: + if debug: + try: + debuglog = Logger(open(debugfile, 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() + else: + debuglog = NullLogger() + + errors = False + + # Validate the parser information + if pinfo.validate_all(): + raise YaccError('Unable to build parser') + + if not pinfo.error_func: + errorlog.warning('no p_error() function is defined') + + # Create a grammar object + grammar = Grammar(pinfo.tokens) + + # Set precedence level for terminals + for term, assoc, level in pinfo.preclist: + try: + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) + + # Add productions to the grammar + for funcname, gram in pinfo.grammar: + file, line, prodname, syms = gram + try: + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True + + # Set the grammar start symbols + try: + if start is None: + grammar.set_start(pinfo.start) + else: + grammar.set_start(start) + except GrammarError as e: + errorlog.error(str(e)) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Verify the grammar structure + undefined_symbols = grammar.undefined_symbols() + for sym, prod in undefined_symbols: + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True + + unused_terminals = grammar.unused_terminals() + if unused_terminals: + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') + for term in unused_terminals: + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) + + # Print out all productions to the debug log + if debug: + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) + + # Find unused non-terminals + unused_rules = grammar.unused_rules() + for prod in unused_rules: + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) + + if len(unused_terminals) == 1: + errorlog.warning('There is 1 unused token') + if len(unused_terminals) > 1: + errorlog.warning('There are %d unused tokens', len(unused_terminals)) + + if len(unused_rules) == 1: + errorlog.warning('There is 1 unused rule') + if len(unused_rules) > 1: + errorlog.warning('There are %d unused rules', len(unused_rules)) + + if debug: + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') + terms = list(grammar.Terminals) + terms.sort() + for term in terms: + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') + nonterms = list(grammar.Nonterminals) + nonterms.sort() + for nonterm in nonterms: + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') + + if check_recursion: + unreachable = grammar.find_unreachable() + for u in unreachable: + errorlog.warning('Symbol %r is unreachable', u) + + infinite = grammar.infinite_cycles() + for inf in infinite: + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + + unused_prec = grammar.unused_precedence() + for term, assoc in unused_prec: + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Run the LRTable on the grammar + lr = LRTable(grammar, debuglog) + + if debug: + num_sr = len(lr.sr_conflicts) + + # Report shift/reduce and reduce/reduce conflicts + if num_sr == 1: + errorlog.warning('1 shift/reduce conflict') + elif num_sr > 1: + errorlog.warning('%d shift/reduce conflicts', num_sr) + + num_rr = len(lr.rr_conflicts) + if num_rr == 1: + errorlog.warning('1 reduce/reduce conflict') + elif num_rr > 1: + errorlog.warning('%d reduce/reduce conflicts', num_rr) + + # Write out conflicts to the output file + if debug and (lr.sr_conflicts or lr.rr_conflicts): + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') + + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() + for state, rule, rejected in lr.rr_conflicts: + if (state, id(rule), id(rejected)) in already_reported: + continue + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + + warned_never = [] + for state, rule, rejected in lr.rr_conflicts: + if not rejected.reduced and (rejected not in warned_never): + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) + warned_never.append(rejected) + + # Build the parser + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr, pinfo.error_func) + + parse = parser.parse + return parser diff --git "a/script/local/parser/\346\226\207\346\263\225.md" "b/script/local/parser/\346\226\207\346\263\225.md" new file mode 100644 index 00000000..da91b461 --- /dev/null +++ "b/script/local/parser/\346\226\207\346\263\225.md" @@ -0,0 +1,31 @@ +sentence : conditions THEN function +conditions : conditions OR and_conditions + | and_conditions +and_conditions : and_conditions AND not_conditions + | not_conditions +not_conditions : cdt + | NOT cdt +cdt : expr EQUAL expr + | expr NEQUAL expr + | expr GE expr + | expr GT expr + | expr LE expr + | expr LT expr + | LPAREN conditions RPAREN +expr : expr PLUS term + | expr MINUS term + | term +term : term TIMES factor + | term DIVIDE factor + | term MOD factor + | factor +factor : NUMBER + | STRING + | ID + | NULL + | TRUE + | FALSE + | LPAREN expr RPAREN +function : ID LPAREN variables RPAREN +variables : variables COMMA expr + | expr diff --git a/script/local/rules/rules_multi_node.csv b/script/local/rules/rules_multi_node.csv new file mode 100644 index 00000000..b2fd253c --- /dev/null +++ b/script/local/rules/rules_multi_node.csv @@ -0,0 +1,59 @@ +,Dependency +0,"bypass_workload_manager != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: bypass_workload_manager = false for bypass_workload_manager is not false."")" +1,"enable_dynamic_workload != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_dynamic_workload = false for enable_dynamic_workload is not false."")" +2,"enable_control_group != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_control_group = true for enable_control_group is not true."")" +3,"enable_backend_control != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_backend_control = true for enable_backend_control is not true."")" +4,"enable_vacuum_control != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_vacuum_control = true for enable_vacuum_control is not true."")" +5,"enable_cgroup_switch != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_cgroup_switch = false for enable_cgroup_switch is not false."")" +6,"enable_force_memory_control != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_force_memory_control = false for enable_force_memory_control is not false."")" +7,"enable_dywlm_adjust != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_dywlm_adjust = true for enable_dywlm_adjust is not true."")" +8,"enable_reaper_backend != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_reaper_backend = true for enable_reaper_backend is not true."")" +9,"enable_perm_space != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_perm_space = true for enable_perm_space is not true."")" +10,"enable_transaction_parctl != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_transaction_parctl = true for enable_transaction_parctl is not true."")" +11,"max_active_statements != -1->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: max_active_statements = -1 for max_active_statements is not -1."")" +12,"dynamic_memory_quota != 80->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: dynamic_memory_quota = 80 for dynamic_memory_quota is not 80."")" +13,"comm_client_bind != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: comm_client_bind = false for comm_client_bind is not false."")" +14,"comm_max_datanode != 256->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: comm_max_datanode = 256 for comm_max_datanode is not 256."")" +15,"comm_max_stream != 1024->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: comm_max_stream = 1024 for comm_max_stream is not 1024."")" +16,"enable_parallel_ddl != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_parallel_ddl = true for enable_parallel_ddl is not true."")" +17,"enable_nodegroup_debug != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_nodegroup_debug = false for enable_nodegroup_debug is not false."")" +18,"enable_dngather != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_dngather = false for enable_dngather is not false."")" +19,"enable_light_proxy != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_light_proxy = true for enable_light_proxy is not true."")" +20,"enable_trigger_shipping != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_trigger_shipping = true for enable_trigger_shipping is not true."")" +21,"enable_ai_stats != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_ai_stats = true for enable_ai_stats is not true."")" +22,"enable_remotejoin != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_remotejoin = true for enable_remotejoin is not true."")" +23,"enable_fast_query_shipping != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_fast_query_shipping = true for enable_fast_query_shipping is not true."")" +24,"enable_remotegroup != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_remotegroup = true for enable_remotegroup is not true."")" +25,"enable_remotesort != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_remotesort = true for enable_remotesort is not true."")" +26,"enable_remotelimit != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_remotelimit = true for enable_remotelimit is not true."")" +27,"gtm_backup_barrier != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: gtm_backup_barrier = false for gtm_backup_barrier is not false."")" +28,"enable_stream_operator != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_stream_operator = true for enable_stream_operator is not true."")" +29,"enable_unshipping_log != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_unshipping_log = false for enable_unshipping_log is not false."")" +30,"enable_stream_concurrent_update != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_stream_concurrent_update = true for enable_stream_concurrent_update is not true."")" +31,"enable_stream_recursive != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_stream_recursive = true for enable_stream_recursive is not true."")" +32,"enable_random_datanode != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_random_datanode = true for enable_random_datanode is not true."")" +33,"enable_fstream != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_fstream = false for enable_fstream is not false."")" +34,"enable_cluster_resize != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_cluster_resize = false for enable_cluster_resize is not false."")" +35,"enable_acceleration_cluster_wlm != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_acceleration_cluster_wlm = false for enable_acceleration_cluster_wlm is not false."")" +36,"agg_redistribute_enhancement != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: agg_redistribute_enhancement = false for agg_redistribute_enhancement is not false."")" +37,"max_cn_temp_file_size != 5242880->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: max_cn_temp_file_size = 5242880 for max_cn_temp_file_size is not 5242880."")" +38,"best_agg_plan != 0->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: best_agg_plan = 0 for best_agg_plan is not 0."")" +39,"dngather_min_rows != 500->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: dngather_min_rows = 500 for dngather_min_rows is not 500."")" +40,"stream_multiple != 1->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: stream_multiple = 1 for stream_multiple is not 1."")" +41,"expected_computing_nodegroup != ""query""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: expected_computing_nodegroup = 'query' for expected_computing_nodegroup is not 'query'."")" +42,"default_storage_nodegroup != ""installation""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: default_storage_nodegroup = 'installation' for default_storage_nodegroup is not 'installation'."")" +43,"application_type != ""not_perfect_sharding_type""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: application_type = 'not_perfect_sharding_type' for application_type is not 'not_perfect_sharding_type'."")" +44,"enable_gtm_free != ""on""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_gtm_free = true for enable_gtm_free is not true."")" +45,"comm_cn_dn_logic_conn != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: comm_cn_dn_logic_conn = false for comm_cn_dn_logic_conn is not false."")" +46,"gtm_option != 2->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: gtm_option = 2 for gtm_option is not 2."")" +47,"gtm_connect_retries != 30->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: gtm_connect_retries = 30 for gtm_connect_retries is not 30."")" +48,"gtm_conn_check_interval != 10->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: gtm_conn_check_interval = 10 for gtm_conn_check_interval is not 10."")" +49,"default_index_kind != 2->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: default_index_kind = 2 for default_index_kind is not 2."")" +50,"update_process_title != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: update_process_title = false for update_process_title is not false."")" +51,"enable_router != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_router = false for enable_router is not false."")" +52,"enable_redistribute != ""off""->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: enable_redistribute = false for enable_redistribute is not false."")" +53,"transaction_sync_naptime != 30->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: transaction_sync_naptime = 30 for transaction_sync_naptime is not 30."")" +54,"transaction_sync_timeout != 600->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: transaction_sync_timeout = 600 for transaction_sync_timeout is not 600."")" +55,"session_sequence_cache != 10->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: session_sequence_cache = 10 for session_sequence_cache is not 10."")" +56,"gtm_connect_timeout != 2->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: gtm_connect_timeout = 2 for gtm_connect_timeout is not 2."")" +57,"max_datanode_for_plan != 0->Overwrite(1,"" When cluster is not a singleNode, UnsupportGuc will be overwrite: max_datanode_for_plan = 0 for max_datanode_for_plan is not 0."")" diff --git a/script/local/rules/rules_single_node.csv b/script/local/rules/rules_single_node.csv new file mode 100644 index 00000000..60c31ece --- /dev/null +++ b/script/local/rules/rules_single_node.csv @@ -0,0 +1,108 @@ +,Dependency +0,"enable_global_plancache == ""on"" && enable_thread_pool == ""off""->NotEffect(1,"" ENABLE_GPC will not be on when enable_thread_pool == false"")" +1,"enable_cachedplan_mgr == ""on"" && (enable_global_plancache == ""on"" && enable_thread_pool == ""on"")->NotEffect(1,"" enable_cachedplan_mgr will not be on when ENABLE_GPC is on"")" +2,"transaction_read_only==""on"" && default_transaction_read_only==""off""->Overwrite(2,"" transaction_read_only = true but default_transaction_read_only = false, transaction_read_only may be overwrited as false"")" +3,"transaction_deferrable != default_transaction_deferrable->Overwrite(1,"" transaction_deferrable != default_transaction_deferrable, transaction_deferrable will be overwrited by default_transaction_deferrable"")" +4,"zero_damaged_pages==""on""->Performance(1,"" Set zero_damaged_pages=true may cause 5-15% performance degradation"")" +5,"work_mem < 256*1024->NotEffect(2,"" Set work_mem <= 256*1024 may make work_mem invalid for calcute free_mem"")" +6,"statement_mem > work_mem && statement_max_mem > work_mem->NotEffect(2,"" Set statement_mem > work_mem && statement_max_mem > work_mem may make work_mem invalid for calcute free_mem"")" +7,"enable_wdr_snapshot == ""off"" || ss_enable_dms == ""on""->NotEffect(1,"" Set enable_wdr_snapshot = false or ss_enable_dms = true will make wdr_snapshot_interval and wdr_snapshot_retention_days invalid"")" +8,"ss_enable_dss == ""on""->NotEffect(1,"" Set ss_enable_dss = true will make wal_sync_method invalid"")" +9,"enable_mix_replication == ""on""->NotEffect(2,"" Set enable_mix_replication == true may make wal_sync_method invalid"")" +10,"(ss_enable_dms == ""on"" || ss_enable_dss == ""on"") && default_transaction_isolation != ""read committed""->NotEffect(1,"" Only support read committed transcation isolation level while DMS and DSS enabled."")" +11,"pooler_port != port+1->Alert(1,"" Pooler_port must equal to gsql listen port plus one!"")" +12,"max_active_gtt <= 0 && vacuum_gtt_defer_check_age > 0->NotEffect(1,"" max_active_gtt <=0 will make vacuum_gtt_defer_check_age not used"")" +13,"vacuum_freeze_table_age > autovacuum_freeze_max_age*0.95->NotEffect(1,"" vacuum_freeze_table_age > autovacuum_freeze_max_age * 0.95 will make vacuum_freeze_table_age not used"")" +14,"enable_hadoop_env == ""on"" && max_query_retry_times > 0->NotEffect(1,"" enable_hadoop_env == true will make stmt retry not enabled"")" +15,"enable_gtm_free == ""on"" && upgrade_mode == 0 && enable_cluster_resize == ""off"" && enable_twophase_commit == ""off""->Alert(2,"" enable_gtm_free == true && upgrade_mode == 0 && enable_cluster_resize == false && enable_twophase_commit == false may lead to errmsg: Unsupport DML two phase commit under gtm free mode. Set enable_twophase_commit to on if need to use DML two phase commit."")" +16,"enable_incremental_checkpoint == ""on"" && incremental_checkpoint_timeout > 0 && checkpoint_timeout > 0->Overwrite(1,"" The actual checkpoint timeout will be assigned by incremental_checkpoint_timeout instead of checkpoint_timeout"")" +17,"(ss_enable_dss == ""on"" || enable_incremental_checkpoint == ""off"") && enable_double_write == ""on""->NotEffect(1,"" enable_double_write will not effect when ss_enable_dss == true || enable_incremental_checkpoint == false"")" +18,"ss_enable_dss == ""on""->Overwrite(2,"" When set ss_enable_dss == true, check if ENABLE_LITE_MODE, if ENABLE_LITE_MODE, ss_enable_dss will be overwrite as false "")" +19,"plog_merge_age >0 && logging_collector == ""off""->NotEffect(1,"" plog_merge_age > 0 but logging_collector == false, profile log will not be collected"")" +20,"autovacuum_vacuum_cost_delay >= 0->NotEffect(1,"" The actual vacuum_cost_delay will be assigned by autovacuum_vacuum_cost_delay instead of vacuum_cost_delay"")" +21,"autovacuum_vacuum_cost_limit >= 0->NotEffect(1,"" The actual vacuum_cost_limit will be assigned by autovacuum_vacuum_cost_limit instead of vacuum_cost_limit"")" +22,"enable_vector_engine == ""off""->NotEffect(1,"" try_vector_engine_strategy will not effect when enable_vector_engine == false "")" +23,"enable_acceleration_cluster_wlm == ""on"" && transaction_pending_time > 0->NotEffect(2,"" transaction_pending_time may not effect when enable_acceleration_cluster_wlm == true"")" +24,"transaction_read_only == ""on"" && transaction_deferrable == ""on""->Alert(2,"" Errormsg may report: A snapshot-importing transaction must not be READ ONLY DEFERRABLE"")" +25,"track_activities == ""off""->Alert(2,"" Errormsg may report: GUC parameter 'track_activities' is off"")" +26,"track_sql_count == ""off""->Alert(2,"" Errormsg may report: GUC parameter 'track_sql_count' is off"")" +27," td_compatible_truncation == ""on"" && sql_compatibility == ""C""->Alert(2,"" td_compatible_truncation == true && sql_compatibility == C_FORMAT may lead to error report: failed on assertion in specific file and line"")" +28,"ss_enable_dms == ""on"" || enable_stream_replication==""off"" ||max_wal_senders <=0 || synchronous_commit == ""off"" || synchronous_commit == ""local""->Alert(2,"" Current configs will not requested sync replication: +ss_enable_dms == true || enable_stream_replication==false ||max_wal_senders <=0 || synchronous_commit <= SYNCHRONOUS_COMMIT_LOCAL_FLUSH"")" +29,"(synchronous_commit != ""off"" && synchronous_commit != ""local"")&& enable_dcf == ""on""->Function(2,"" WaitCheckpointSync will use SyncPaxosWaitForLSN instead of SyncRepWaitForLSN"")" +30,"standby_shared_buffers_fraction < 1.0->Overwrite(2,"" standby_shared_buffers_fraction may be overwrite by standby_shared_buffers_fraction + 0.1 when standby_shared_buffers_fraction < 1.0"")" +31,"standby_shared_buffers_fraction < 1.0->Alert(2,"" standby_shared_buffers_fraction < 1.0 may lead to errmsg: no unpinned buffers available"")" +32,"enable_nvm == ""off"" && (nvm_file_path != """" || nvm_buffers >0)->NotEffect(1,"" nvm_file_path and nvm_buffers will not effect when enable_nvm == false"")" +33,"enable_global_plancache == ""on"" && enable_thread_pool == ""on""->NotEffect(2,"" sql_beta_feature and nvm_buffers will not effect when GPC not enabled"")" +34,"use_workload_manager == ""off"" && enable_resource_track == ""on""->NotEffect(1,"" enable_resource_track will not effect when use_workload_manager == false"")" +35,"use_workload_manager == ""off"" && enable_resource_record == ""on""->NotEffect(1,"" enable_resource_recordurce_track will not effect when use_workload_manager == false"")" +36,"use_workload_manager == ""off"" && enable_logical_io_statistics == ""on""->NotEffect(1,"" enable_logical_io_statistics will not effect when use_workload_manager == false"")" +37,"use_workload_manager == ""off"" && enable_user_metric_persistent == ""on""->NotEffect(1,"" enable_user_metric_persistent will not effect when use_workload_manager == false"")" +38,"(ssl == ""off"" && require_ssl == ""on"") || (ssl == ""off"" && require_ssl == ""on"")->Function(2,"" ssl and require_ssl should be set as the same"")" +39,"user_metric_retention_time >= 0 && enable_user_metric_persistent == ""off""->NotEffect(2,"" user_metric_retention_time may not effect when enable_user_metric_persistent == false"")" +40,"bbox_dump_count >=0 && enable_bbox_dump ==""off""->NotEffect(1,"" bbox_dump_count will not effect when enable_bbox_dump == false"")" +41,"bbox_dump_path != """" && enable_bbox_dump ==""off""->NotEffect(1,"" bbox_dump_path will not effect when enable_bbox_dump == false"")" +42,"enable_expr_fusion == ""on"" && query_dop != 1->NotEffect(1,"" enable_expr_fusion will not be on when query_dop != 1"")" +43,"use_workload_manager == ""off"" && query_band != """"->NotEffect(2,"" query_band may not effect when use_workload_manager == false"")" +44,"plsql_show_all_error == ""on"" || check_function_bodies == ""on""->Alert(2,"" Errormsg may report: InsertError for some func_oid may happen"")" +45,"(password_min_length >0 || password_max_length >0|| password_min_uppercase != 0 || password_min_lowercase != 0 || password_min_digital != 0 || password_min_special != 0) && password_policy == 0->NotEffect(2,"" password_policy == 0 will make password_min_length,password_max_length,password_min_uppercase,password_min_lowercase,password_min_digital,password_min_special not effect"")" +46,"(pagewriter_sleep >0 || dirty_page_percent_max>0 || candidate_buf_percent_target > 0) && enable_incremental_checkpoint == ""off""->NotEffect(2,"" candidate_buf_percent_target, pagewriter_sleep and dirty_page_percent_max may not effect when enable_incremental_checkpoint == false"")" +47,"bgwriter_delay < pagewriter_sleep->NotEffect(2,"" Next scan buffer pool time will use pagewriter_sleep instead of bgwriter_delay for bgwriter_delay < pagewriter_sleep"")" +48,"dw_file_num > pagwriter_thread_num->NotEffect(1,"" dw_file_num will be assigned as pagwriter_thread_num when dw_file_num > pagwriter_thread_num"")" +49,"lo_compat_privileges == ""on""->Function(3,"" Set lo_compat_privileges to true will disable permission checks when reading or modifying large objects"")" +50,"enable_incremental_checkpoint == ""off"" && log_pagewriter == ""on""->NotEffect(2,"" log_pagewriter will not effect when enable_incremental_checkpoint == false "")" +51,"enable_opfusion == ""off"" && enable_beta_opfusion == ""on""->NotEffect(2,"" enable_beta_opfusion will not effect when enable_opfusion == false "")" +52,"log_duration == ""off"" && log_min_duration_statement < 0->NotEffect(2,"" log_min_duration_statement will not effect when log_duration == false && log_min_duration_statement < 0"")" +53,"logging_collector == ""off"" && log_error_verbosity != terse + ->NotEffect(2,"" log_error_verbosity will not effect when logging_collector == false"")" +54,"most_available_sync == ""off"" && keep_sync_window > 0->NotEffect(2,"" keep_sync_window will not effect when most_available_sync == false"")" +55,"xlog_file_path == 0 || enable_dcf == ""on"" || (synchronous_commit != ""off"" && synchronous_commit != ""local"")->Function(2,"" NotDelayIntoMostAvaSync may fail when xlog_file_path == 0 || enable_dcf = true || synchronous_commit > SYNCHRONOUS_COMMIT_LOCAL_FLUSH"")" +56," enable_global_plancache == ""off"" || enable_thread_pool == ""off""->NotEffect(2,"" join_collapse_limit will not effect when GPL not enabled"")" +57,"resource_track_level == ""none"" || enable_resource_track == ""off"" || resource_track_cost == -1->NotEffect(2,"" io_limits and io_priority may not effect when resource_track_level == RESOURCE_TRACK_NONE || enable_resource_track == false || resource_track_cost == -1"")" +58,"resource_track_level != ""none"" && enable_resource_track == ""off""->NotEffect(2,"" resource_track_level will not effect when enable_resource_track == false"")" +59,"query_max_mem > 0 && query_max_mem < 32768->Overwrite(1,"" query_max_mem will be assigned to 0 when 0 < query_max_mem < 32768"")" +60,"idle_in_transaction_session_timeout > 0 && session_timeout == 0->Alert(2,"" Errormsg 'could not disable timer for idle-in-transaction timeout' may report when idle_in_transaction_session_timeout > 0 && session_timeout == 0"")" +61,"geqo == ""off"" && (geqo_threshold > 0 || geqo_pool_size > 0 || geqo_generations > 0 || geqo_selection_bias != NULL)->NotEffect(1,"" geqo == false will make geqo_selection_bias, geqo_threshold, geqo_pool_size, geqo_generations not effect"")" +62,"geqo_selection_bias != NULL && ( enable_global_plancache == ""off"" || enable_thread_pool == ""off"")->NotEffect(2,"" geqo_selection_bias may not effect when GPL not enabled"")" +63,"enable_double_write == ""on"" && enable_incremental_checkpoint == ""off""->NotEffect(1,"" enable_double_write will not effect when enable_incremental_checkpoint == false"")" +64,"enable_double_write == ""on"" && full_page_writes == ""on""->NotEffect(1,"" full_page_writes will not open when enable_double_write is open"")" +65,"enable_fast_allocate == ""off"" && fast_extend_file_size > 0->NotEffect(1,"" fast_extend_file_size will not effect when enable_fast_allocate == false"")" +66,"enable_stream_replication == ""on"" && xlog_file_path == NULL->NotEffect(2,"" enable_stream_replication may not effect when xlog_file_path == NULL"")" +67,"enable_stream_replication == ""on"" && enable_mix_replication == ""on""->NotEffect(2,"" enable_stream_replication may not open when enable_mix_replication == true"")" +68,"enable_show_any_tuples == ""on""->Alert(3,"" enable_show_any_tuples == true will make all versions of the tuple in the table visible, and may lead to some errmsg for specific sql s: s cannot be executed when enable_show_any_tuples is true."")" +69,"enable_online_ddl_waitlock == ""on"" && xc_maintenance_mode == ""off""->Alert(2,"" Errormsg may be report: 'kill backend is prohibited during online expansion.' when enable_online_ddl_waitlock == true && xc_maintenance_mode == false"")" +70,"enable_hashagg == ""off""->Function(3,"" Errormsg may report: '[Multi count(distinct) convert failure reason]: Enable_hashagg disabled.' when enable_hashagg == false"")" +71,"enable_data_replicate == ""on""->Alert(1,"" Check if starting as multi_standby mode, if starting as multi_standby mode, errormsg will report: 'when starting as multi_standby mode, we couldn't support data replicaton.' when enable_data_replicate == true"")" +72,"enable_mix_replication == ""on"" && walsender_max_send_size >= data_replicate_buffer_size->Alert(1,"" Errormsg will report: 'the data queue buffer size must be larger than the wal sender max send size for the ' + +'replication data synchronized by the WAL streaming.' when enable_mix_replication == true && walsender_max_send_size >= data_replicate_buffer_size"")" +73,"max_wal_senders >= max_connections->Alert(1,"" Errormsg will report: 'max_wal_senders must be less than max_connections ' when max_wal_senders >= max_connections"")" +74,"sysadmin_reserved_connections >= max_connections->Alert(1,"" Errormsg will report: 'sysadmin_reserved_connections must be less than max_connections ' when sysadmin_reserved_connections >= max_connections"")" +75,"archive_mode == ""on"" && wal_level == ""minimal""->Alert(1,"" Errormsg will report: 'WAL archival (archive_mode=on) requires wal_level \'archive\', \'hot_standby\' or \'logical\'' when archive_mode == true && wal_level == 'minimal'"")" +76,"max_wal_senders > 0 && wal_level == ""minimal""->Alert(1,"" Errormsg will report: 'WAL streaming (max_wal_senders > 0) requires wal_level \'archive\', \'hot_standby\' or ' + +'\'logical\'' when max_wal_senders > 0 && wal_level == 'minimal'"")" +77,"wal_level != ""hot_standby"" && hot_standby == ""on""->Alert(1,"" Errormsg will report: 'hot standby is not possible because wal_level was not set to \'hot_standby\'' when wal_level != 'hot_standby' && hot_standby == true"")" +78,"max_wal_senders <1 && wal_level == ""minimal""->Alert(1,"" Check if starting as dual mode, if true, errormsg may report: 'when starting as dual mode, we must ensure wal_level was not \'minimal\' and max_wal_senders ' + +'was set at least 1' when max_wal_senders <1 && wal_level == 'minimal'"")" +79,"recovery_max_workers >= 1->Alert(1,"" Check if starting as dummy_standby mode, if true, errormsg may report: 'when starting as dummy_standby mode, we couldn't support parallel redo, down it' when recovery_max_workers >= 1"")" +80,"xlog_file_path != NULL && xlog_file_size == 0->Alert(1,"" Errormsg will report: 'configured \'xlog_file_path\' but \'xlog_file_size\' is zero.' when xlog_file_path != NULL && xlog_file_size == 0 "")" +81,"xlog_file_path != NULL && xlog_file_size % 16 * 1024 * 1024 != 0->Alert(1,"" Errormsg will report: 'value of \'xlog_file_size\' must be an integer multiple of XLogSegSize' when xlog_file_size % XLogSegSize != 0(XLogSegSize = 2^24)"")" +82,"ss_enable_dss == ""on"" && temp_tablespaces != NULL && temp_tablespaces != """"->Alert(1,"" Errormsg will report: 'shared storage mode could not support specifics tablespace(s).' when ss_enable_dss == true && temp_tablespaces != NULL && temp_tablespaces != '' +Hint:'Either set temp_tablespaces to NULL, or turn off ss_enable_dss.'"")" +83,"xlog_lock_file_path == NULL && xlog_file_path != NULL->Alert(1,"" Errormsg will report: 'use scsi to preempt shared storage' when xlog_lock_file_path == NULL && xlog_file_path != NULL"")" +84,"recovery_parse_workers > 1->Alert(1,"" Check if starting as dummy_standby mode, if true, errormsg may report: 'when starting as dummy_standby mode, we couldn't support extreme rto.' when recovery_parse_workers > 1 +Hint: so down extreme rto, make recovery_parse_workers <= 1"")" +85,"recovery_parse_workers > 1 && wal_receiver_buffer_size < 32 * 1024->Alert(1,"" Errormsg will report: 'when starting extreme rto, wal receiver buf should not smaller than %dMB' when recovery_parse_workers > 1 && wal_receiver_buffer_size < 32768 +Hint: 'recommend config \'wal_receiver_buffer_size=64MB\''"")" +86,"recovery_parse_workers > 1 && hot_standby == ""on""->Alert(1,"" Errormsg will report: 'extreme rto could not support hot standby.' when recovery_parse_workers > 1 && hot_standby == true +Hint: 'Either turn off extreme rto, or turn off hot_standby.'"")" +87,"lastval_supported == ""on"" && enable_beta_features == fasle->NotEffect(2,"" lastval_supported will not effect when enable_beta_features == fasle"")" +88,"enable_mix_replication == ""on"" && enable_cbm_tracking == ""on""->Alert(1,"" Errormsg will report: 'enable_cbm_tracking must be turn on when enable_mix_replication is on!' when enable_mix_replication == true && enable_cbm_tracking == true"")" +89,"ss_enable_dms == ""on"" && default_transaction_isolation != ""read committed""->Alert(1,""Errormsg will report: 'Only support read committed transcation isolation level while DMS and DSS enabled.' when ss_enable_dms == true && default_transaction_isolation != 'read committed'"")" +90,"debug_print_plan == ""off"" && (log_min_messages == ""debug5"" || log_min_messages == ""debug4"" || log_min_messages == ""debug3"" || log_min_messages == ""debug2"" || log_min_messages == ""debug1"" || log_min_messages == ""log"" )->NotEffect(3,""debug_print_plan == false may make the debug log print failed"")" +91,"enable_dcf == ""off""->NotEffect(2,"" Cluster is not installed as dcf mode, so dcf related configs will not effect"")" +92,"random_page_cost < seq_page_cost->NotEffect(2,"" random_page_cost may not effect when random_page_cost < seq_page_cost"")" +93,"audit_enabled == ""off""->NotEffect(2,"" audit_enabled == false, so audit related configs will not effect"")" +94,"audit_enabled == ""on"" && (audit_function_exec == ""off"" || audit_system_function_exec == ""off"" || audit_copy_exec == ""off"")->NotEffect(1,"" Some audit functions will not performed normally when audit_enabled == true && (audit_function_exec == false || audit_system_function_exec == false || audit_copy_exec == false)"")" -- Gitee From 4ab385bd47720dc5d5ff5354dccded19300c93fe Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 2 Dec 2024 18:38:52 +0800 Subject: [PATCH 02/87] code check01 --- script/local/LocalCheckSE.py | 148 +++++++++++++++---------------- script/local/parser/functions.py | 42 ++++++--- script/local/parser/lex.py | 52 +++++++---- script/local/parser/myLexer.py | 23 ++++- script/local/parser/myYACC.py | 25 +++++- script/local/parser/parsetab.py | 30 +++++-- script/local/parser/utils.py | 50 +++++++---- script/local/parser/variables.py | 22 ++++- script/local/parser/yacc.py | 54 +++++++---- 9 files changed, 300 insertions(+), 146 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index 89108034..61895bc1 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -2541,7 +2541,7 @@ def execute_query(sql_query): ############################################################################# -def checkConnectionStatus(isSetting): +def check_connection_status(isSetting): """ function : checkConnectionStatus input : Bool @@ -2589,16 +2589,16 @@ def format_percent(value): return "%.2f%%" % value -def checkMemoryUsageSituation(): +def check_memory_usage_situation(): """ - function : checkMemoryUsageSituation + function : check_memory_usage_situation input : Bool output : NA """ - maintenance_work_mem = convertMemoryStrToNum(execute_query("""show maintenance_work_mem;""")) + maintenance_work_mem = convert_memory_str_to_num(execute_query("""show maintenance_work_mem;""")) all_databases_size = int(execute_query("""select sum(pg_database_size(datname)) from pg_database;""")) - shared_buffers = convertMemoryStrToNum(execute_query("""show shared_buffers""")) - effective_cache_size = convertMemoryStrToNum(execute_query("""show effective_cache_size""")) + shared_buffers = convert_memory_str_to_num(execute_query("""show shared_buffers""")) + effective_cache_size = convert_memory_str_to_num(execute_query("""show effective_cache_size""")) if maintenance_work_mem <= 64 * 1024: message = " Warning reason:maintenance_work_mem is less or equal to its default value. Increase it to reduce maintenance tasks duration" g_logger.log(message) @@ -2615,31 +2615,31 @@ def checkMemoryUsageSituation(): g_logger.log(message) -def convertMemoryStrToNum(mem_str): +def convert_memory_str_to_num(mem_str): units = {'KB': 1, 'MB': 1024, 'GB': 1024, 'TB': 1024 ** 3} num, unit = mem_str[:-2], mem_str[-2:] return int(float(num) * units[unit]) -def checkSharedBuffersHitRate(): +def check_shared_buffers_hit_rate(): """ - function : checkSharedBuffersHitRate + function : check_shared_buffers_hit_rate input : Bool output : NA """ - Shared_buffers_hit_rate = float(execute_query( + shared_buffers_hit_rate = float(execute_query( """select sum(idx_blks_hit)*100/(sum(idx_blks_read)+sum(idx_blks_hit)+1) from pg_statio_all_tables;""")) - if Shared_buffers_hit_rate > 99.99: + if shared_buffers_hit_rate > 99.99: message = " Warning reason:This is too high. If this openGauss instance was recently used as it usually is and was not stopped since, then you may reduce shared_buffer" g_logger.log(message) - if Shared_buffers_hit_rate < 90: + if shared_buffers_hit_rate < 90: message = " Warning reason:This is too low. Increase shared_buffer memory to increase hit rate" g_logger.log(message) -def checkLogSituation(): +def check_log_situation(): """ - function : checkLogSituation + function : check_log_situation input : Bool output : NA """ @@ -2664,7 +2664,7 @@ def checkLogSituation(): g_logger.log(message) -def checkUsers(): +def check_users(): expiring_soon_users = execute_query("""select usename from pg_user where valuntil < now() + interval '7 days'""") i_am_super = execute_query("""select usename from pg_shadow where passwd='md5'||md5(usename||usename)""") if len(expiring_soon_users) > 0: @@ -2692,7 +2692,7 @@ def checkConnection(isSetting=False): checkHostnossl() checkHostAddressno0() checkSSLConnection(isSetting) - checkConnectionStatus(isSetting) + check_connection_status(isSetting) def checkMonitorIP(isSetting): @@ -4731,8 +4731,8 @@ def checkRuntimeEnvironmentConfiguration(isSetting=False): checkUmask(isSetting) checkHidepid() checkNtpd() - checkMemoryUsageSituation() - checkSharedBuffersHitRate() + check_memory_usage_situation() + check_shared_buffers_hit_rate() def checkUmask(isSetting): @@ -4823,22 +4823,22 @@ def checkOtherConfigurations(isSetting=False): """ checkBackslashQuote(isSetting) checkAllowSystemTableMods(isSetting) - checkRunningTime(isSetting) - checkUsers() - checkPhaseCommit() - checkAutovacuum() - checkPoint() - checkStorage() - checkWal() - checkPlanner() - checkIndexes() - checkProcedures() - checkOvercommit() - checkArchive() - checkBgwriter() - checkHugepages() - checkIoSchedule(ssd=0) - checkDependencies() + check_running_time(isSetting) + check_users() + check_phase_commit() + check_autovacuum() + check_point() + check_storage() + check_wal() + check_planner() + check_indexes() + check_procedures() + check_overcommit() + check_archive() + check_bgwriter() + check_hugepages() + check_io_schedule(ssd=0) + check_dependencies() def checkBackslashQuote(isSetting): @@ -4870,9 +4870,9 @@ def checkAllowSystemTableMods(isSetting): setAllowSystemTableMods(data) -def checkRunningTime(isSetting): +def check_running_time(isSetting): """ - function : checkRunningTime + function : check_running_time input : Bool output : NA """ @@ -4884,14 +4884,14 @@ def checkRunningTime(isSetting): " Warning reason:Uptime less than 1 day. This report may be inaccurate") -def checkPhaseCommit(): +def check_phase_commit(): """ - function : checkPhaseCommit + function : check_phase_commit input : Bool output : NA """ cur_version = execute_query("""SELECT opengauss_version();""") - if isLaterVersion(min_ver='1.0', cur_ver=cur_version): + if is_later_version(min_ver='1.0', cur_ver=cur_version): prepared_xact_count = int(execute_query("""select count(1) from pg_prepared_xacts;""")) if prepared_xact_count != 0: message = " Warning reason:two-phase commit prepared transactions exist. If they stay for too long they may lock objects for too long" @@ -4904,7 +4904,7 @@ def checkPhaseCommit(): g_logger.log(message) -def isLaterVersion(min_ver, cur_ver): +def is_later_version(min_ver, cur_ver): min_major, min_minor = min_ver.split('.')[0], min_ver.split('.')[1] cur_major, cur_minor = cur_ver.split('.')[0], cur_ver.split('.')[1] min_major, min_minor, cur_major, cur_minor = int(min_major), int(min_minor), int(cur_major), int(cur_minor) @@ -4915,9 +4915,9 @@ def isLaterVersion(min_ver, cur_ver): return False -def checkAutovacuum(): +def check_autovacuum(): """ - function : checkAutovacuum + function : check_autovacuum input : Bool output : NA """ @@ -4932,9 +4932,9 @@ def checkAutovacuum(): g_logger.log(message) -def checkPoint(): +def check_point(): """ - function : checkPoint + function : check_point input : Bool output : NA """ @@ -4970,9 +4970,9 @@ def trans(data): return int(re.sub(r'\D', '', data)) -def checkWal(): +def check_wal(): """ - function : checkWal + function : check_wal input : Bool output : NA """ @@ -4982,26 +4982,26 @@ def checkWal(): g_logger.log(message) -def checkPlanner(): +def check_planner(): """ - function : checkPlanner + function : check_planner input : Bool output : NA """ - ModifiedCosts = execute_query("""select name from pg_settings where name like '%cost%' and setting<>boot_val;""") - DisabledPlanFunctions = execute_query( + modified_costs = execute_query("""select name from pg_settings where name like '%cost%' and setting<>boot_val;""") + disabled_plan_functions = execute_query( """select name, setting from pg_settings where name like 'enable_%' and setting='off' ;""") - if len(ModifiedCosts) != 0: + if len(modified_costs) != 0: message = " Warning reason:Some I/O cost settings are not set to their default value,This may lead the planner to create suboptimal plans" g_logger.log(message) - if len(DisabledPlanFunctions) != 0: + if len(disabled_plan_functions) != 0: message = " Warning reason:Some plan features are disabled: " g_logger.log(message) -def checkIndexes(): +def check_indexes(): """ - function : checkIndexes + function : check_indexes input : Bool output : NA """ @@ -5012,9 +5012,9 @@ def checkIndexes(): g_logger.log(message) -def checkProcedures(): +def check_procedures(): """ - function : checkProcedures + function : check_procedures input : Bool output : NA """ @@ -5025,20 +5025,20 @@ def checkProcedures(): g_logger.log(message) -def checkOvercommit(): +def check_overcommit(): """ - function : checkOvercommit + function : check_overcommit input : Bool output : NA """ cmd_memory = "cat /proc/sys/vm/overcommit_memory" cmd_ratio = "cat /proc/sys/vm/overcommit_ratio" os_name = "uname -s" - overcommit_memory = getCmdRes(cmd_memory) - overcommit_ratio = getCmdRes(cmd_ratio) + overcommit_memory = get_cmd_res(cmd_memory) + overcommit_ratio = get_cmd_res(cmd_ratio) if not (is_pure_digit(overcommit_memory) and is_pure_digit(overcommit_ratio)): return - os_name = getCmdRes(os_name) + os_name = get_cmd_res(os_name) if os_name != 'darwin' and int(overcommit_memory) != 2: message = " Warning reason:Memory overcommitment is allowed on the system. This may lead the OOM Killer to kill at least one openGauss process, DANGER!" g_logger.log(message) @@ -5056,7 +5056,7 @@ def is_pure_digit(s): return s.isdigit() -def getCmdRes(command): +def get_cmd_res(command): try: result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) @@ -5065,11 +5065,11 @@ def getCmdRes(command): return '' -def checkStorage(): +def check_storage(): fsync = execute_query("""show fsync""") wal_sync_method = execute_query("""show wal_sync_method""") synchronize_seqscans = execute_query("""show synchronize_seqscans""") - os_name = getCmdRes("uname -s") + os_name = get_cmd_res("uname -s") if fsync != 'on': message = " Warning reason:fsync is off. You may lose data after a crash, DANGER!" g_logger.log(message) @@ -5084,7 +5084,7 @@ def checkStorage(): g_logger.log(message) -def checkArchive(): +def check_archive(): archive_timeout = execute_query("""show archive_timeout""") archive_timeout = int(archive_timeout.rstrip('s')) if archive_timeout < 60: @@ -5092,20 +5092,20 @@ def checkArchive(): " Warning reason:Setting archive_timeout to a very small value will result in occupying a huge amount of archive storage space. It is recommended to set archive_timeout to 60 seconds") -def checkBgwriter(): +def check_bgwriter(): bgwriter_lru_multiplier = int(execute_query("""show bgwriter_lru_multiplier""")) if bgwriter_lru_multiplier < 1: g_logger.log( " Warning reason:Setting a smaller bgwriter_lru_multipler reduces the additional I/O overhead caused by backend write processes, increase bgwriter_lru_multiplier") -def checkHugepages(): +def check_hugepages(): huge_pages = execute_query("""show enable_huge_pages""") - os_name = getCmdRes("uname -s") + os_name = get_cmd_res("uname -s") if os_name != 'linux' and os_name != 'Linux' and os_name != 'freebsd': g_logger.log(" Warning reason:No Huge Pages on this OS") else: - nr_hugepages = getCmdRes("cat /proc/sys/vm/nr_hugepages") + nr_hugepages = get_cmd_res("cat /proc/sys/vm/nr_hugepages") if nr_hugepages == None or int(nr_hugepages) == 0: g_logger.log(" Warning reason:No Huge Pages available on the system") else: @@ -5123,7 +5123,7 @@ def checkHugepages(): key, value = item.split(':') os_info[key.strip()] = int(re.search(r'\d+', value.strip()).group()) pg_pid = execute_query("""SELECT pg_backend_pid();""") - peak = getCmdRes("grep ^VmPeak /proc/" + pg_pid + "/status | awk '{ print $2 }'").strip() + peak = get_cmd_res("grep ^VmPeak /proc/" + pg_pid + "/status | awk '{ print $2 }'").strip() if peak.isdigit(): suggesthugepages = int(peak) / int(os_info['Hugepagesize']) if os_info['HugePages_Total'] < int(suggesthugepages + 0.5): @@ -5134,10 +5134,10 @@ def checkHugepages(): g_logger.log(" Warning reason:Change Huge Pages size from 2MB to 1GB if the machine is dedicated to openGauss") -def checkIoSchedule(ssd=0): +def check_io_schedule(ssd=0): active_schedulers = {} os = {} - os['name'] = getCmdRes("uname -s") + os['name'] = get_cmd_res("uname -s") if os['name'] == 'darwin': g_logger.log(" Warning reason:No I/O scheduler information on MacOS") else: @@ -5221,7 +5221,7 @@ def checkIoSchedule(ssd=0): " Warning reason:The CFQ scheduler is inadequate on a virtual machine (because the hypervisor and/or underlying kernel is already in charge of the I/O scheduling)") -def checkDependencies(): +def check_dependencies(): sql_query = """ SELECT json_agg(json_build_object( 'name', name, @@ -5287,7 +5287,7 @@ def get_local_role_value(conf_path): cmd = "gs_ctl query -D %s" % (os.getenv('PGDATA')) try: # Execute commands and capture output - result =subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, + result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) # Extracting the value of local_role using regular expressions match = re.search(r'local_role\s+:\s+(\w+)', result.stdout) diff --git a/script/local/parser/functions.py b/script/local/parser/functions.py index 350a877e..ba30cb06 100644 --- a/script/local/parser/functions.py +++ b/script/local/parser/functions.py @@ -1,3 +1,23 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# import sys import os localDirPath = os.path.dirname(os.path.realpath(__file__)) @@ -7,27 +27,27 @@ sys.path.append(sys.path[0] + "/../") from local.parser.utils import add_dependency_info -def Alert(level, s): - add_dependency_info(level, 'Check Alert', s) +def check_alert(level, s): + add_dependency_info(level, 'Check alert', s) -def NotEffect(level, s): +def check_not_effect(level, s): add_dependency_info(level, 'Check NoEffect', s) -def Overwrite(level, s): +def check_overwrite(level, s): add_dependency_info(level, 'Check Overwrite', s) -def Function(level, s): +def check_function(level, s): add_dependency_info(level, 'Check Function', s) -def Performance(level, s): +def check_performance(level, s): add_dependency_info(level, 'Check Performance', s) function_dict = { - "Alert" : Alert, - "NotEffect" : NotEffect, - "Overwrite" : Overwrite, - "Function" : Function, - "Performance" : Performance, + "alert" : check_alert, + "NotEffect" : check_not_effect, + "Overwrite" : check_overwrite, + "Function" : check_function, + "Performance" : check_performance, } def get_function(name): diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 129f3237..2a7bd3f9 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -1,3 +1,23 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# import re import sys import types @@ -166,10 +186,10 @@ class Lexer: # ------------------------------------------------------------ def token(self): # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen + lexpos = self.lexpos + lexlen = self.lexlen lexignore = self.lexignore - lexdata = self.lexdata + lexdata = self.lexdata while lexpos < lexlen: # This code provides some short-circuit code for whitespace, tabs, and other ignored characters @@ -214,7 +234,7 @@ class Lexer: # Every function must return a token, if nothing, we just move to next token if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. + lexpos = self.lexpos # This is here in case user has updated lexpos. lexignore = self.lexignore # This is here in case there was a state change break return newtok @@ -303,7 +323,7 @@ def _get_regex(func): # ----------------------------------------------------------------------------- def get_caller_module_dict(levels): f = sys._getframe(levels) - return { **f.f_globals, **f.f_locals } + return {**f.f_globals, **f.f_locals} # ----------------------------------------------------------------------------- # _form_master_re() @@ -376,14 +396,14 @@ def _statetoken(s, names): # ----------------------------------------------------------------------------- class LexerReflect(object): def __init__(self, ldict, log=None, reflags=0): - self.ldict = ldict + self.ldict = ldict self.error_func = None - self.tokens = [] - self.reflags = reflags + self.tokens = [] + self.reflags = reflags self.stateinfo = {'INITIAL': 'inclusive'} - self.modules = set() - self.error = False - self.log = Logger(sys.stderr) if log is None else log + self.modules = set() + self.error = False + self.log = Logger(sys.stderr) if log is None else log # Get all of the basic information def get_all(self): @@ -484,11 +504,11 @@ class LexerReflect(object): # Now build up a list of functions and a list of strings self.toknames = {} # Mapping of symbols to token names - self.funcsym = {} # Symbols defined as functions - self.strsym = {} # Symbols defined as strings - self.ignore = {} # Ignore strings by state - self.errorf = {} # Error functions by state - self.eoff = {} # EOF functions by state + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: self.funcsym[s] = [] diff --git a/script/local/parser/myLexer.py b/script/local/parser/myLexer.py index 3208c042..e91c47b2 100644 --- a/script/local/parser/myLexer.py +++ b/script/local/parser/myLexer.py @@ -1,4 +1,23 @@ -# import ply.lex as lex +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# import os import sys localDirPath = os.path.dirname(os.path.realpath(__file__)) @@ -107,7 +126,7 @@ class MyLexer(): t.lexer.lineno += len(t.value) # A string containing ignored characters (spaces and tabs) - t_ignore = ' \t' + t_ignore = ' \t' # Error handling rule def t_error(self,t): diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 31b3e6cd..c19f33dc 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -1,4 +1,23 @@ -# import ply.yacc as yacc +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : CheckInstall.py is a utility to install Gauss MPP Database. +############################################################################# import os import sys localDirPath = os.path.dirname(os.path.realpath(__file__)) @@ -11,7 +30,7 @@ from local.parser.variables import * from local.parser.functions import * from local.parser.myLexer import token_dict -def execFn(fn): +def exec_fn(fn): fn[0](*fn[1]) class MyYACC(): @@ -22,7 +41,7 @@ class MyYACC(): '''sentence : conditions THEN function ''' if p[1]: - execFn(p[3]) + exec_fn(p[3]) def p_conditions_or(p): 'conditions : conditions OR and_conditions' diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py index 8a8ec277..659f36e4 100644 --- a/script/local/parser/parsetab.py +++ b/script/local/parser/parsetab.py @@ -1,12 +1,28 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# +TAB_VERSION = '3.10' -# parsetab.py -# This file is automatically generated. Do not edit. -# pylint: disable=W,C,R -_tabversion = '3.10' +LR_METHOD = 'LALR' -_lr_method = 'LALR' - -_lr_signature = 'AND COMMA DIVIDE EQUAL FALSE GE GT ID LE LPAREN LT MINUS MOD NEQUAL NOT NULL NUMBER OR PLUS RPAREN STRING THEN TIMES TRUEsentence : conditions THEN function \n conditions : conditions OR and_conditionsconditions : and_conditions\n and_conditions : and_conditions AND not_conditions\n and_conditions : not_conditionsnot_conditions : NOT cdtnot_conditions : cdt\n cdt : expr EQUAL expr\n | expr NEQUAL expr\n | expr GE expr\n | expr GT expr\n | expr LE expr\n | expr LT expr\n cdt : LPAREN conditions RPAREN\n expr : expr PLUS term\n | expr MINUS term\n expr : term\n term : term TIMES factor\n | term DIVIDE factor\n | term MOD factor\n term : factor\n factor : NUMBER\n | STRING\n factor : IDfactor : NULL\n factor : TRUE\n | FALSE\n factor : LPAREN expr RPARENfunction : ID LPAREN variables RPAREN\n variables : variables COMMA expr\n variables : expr' +LR_SIGNATURE = 'AND COMMA DIVIDE EQUAL FALSE GE GT ID LE LPAREN LT MINUS MOD NEQUAL NOT NULL NUMBER OR PLUS RPAREN STRING THEN TIMES TRUEsentence : conditions THEN function \n conditions : conditions OR and_conditionsconditions : and_conditions\n and_conditions : and_conditions AND not_conditions\n and_conditions : not_conditionsnot_conditions : NOT cdtnot_conditions : cdt\n cdt : expr EQUAL expr\n | expr NEQUAL expr\n | expr GE expr\n | expr GT expr\n | expr LE expr\n | expr LT expr\n cdt : LPAREN conditions RPAREN\n expr : expr PLUS term\n | expr MINUS term\n expr : term\n term : term TIMES factor\n | term DIVIDE factor\n | term MOD factor\n term : factor\n factor : NUMBER\n | STRING\n factor : IDfactor : NULL\n factor : TRUE\n | FALSE\n factor : LPAREN expr RPARENfunction : ID LPAREN variables RPAREN\n variables : variables COMMA expr\n variables : expr' _lr_action_items = {'NOT':([0,8,18,19,29,],[5,5,5,5,5,]),'LPAREN':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,36,40,53,58,],[8,8,29,8,8,40,40,40,40,40,40,40,40,29,40,40,40,53,40,40,40,]),'NUMBER':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'STRING':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'ID':([0,5,8,17,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[13,13,13,36,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'NULL':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'TRUE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'FALSE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'$end':([1,35,57,],[0,-1,-29,]),'THEN':([2,3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[17,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'OR':([2,3,4,6,9,10,11,12,13,14,15,16,20,30,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[18,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,18,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'RPAREN':([3,4,6,9,10,11,12,13,14,15,16,20,30,31,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,54,55,56,59,],[-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,48,49,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,49,57,-31,-30,]),'AND':([3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[19,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,19,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'EQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[21,-17,-21,-22,-23,-24,-25,-26,-27,21,-15,-16,-28,-18,-19,-20,]),'NEQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[22,-17,-21,-22,-23,-24,-25,-26,-27,22,-15,-16,-28,-18,-19,-20,]),'GE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[23,-17,-21,-22,-23,-24,-25,-26,-27,23,-15,-16,-28,-18,-19,-20,]),'GT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[24,-17,-21,-22,-23,-24,-25,-26,-27,24,-15,-16,-28,-18,-19,-20,]),'LE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[25,-17,-21,-22,-23,-24,-25,-26,-27,25,-15,-16,-28,-18,-19,-20,]),'LT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[26,-17,-21,-22,-23,-24,-25,-26,-27,26,-15,-16,-28,-18,-19,-20,]),'PLUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[27,-17,-21,-22,-23,-24,-25,-26,-27,27,27,27,27,27,27,27,-15,-16,-28,-18,-19,-20,27,27,27,]),'MINUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[28,-17,-21,-22,-23,-24,-25,-26,-27,28,28,28,28,28,28,28,-15,-16,-28,-18,-19,-20,28,28,28,]),'COMMA':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,55,56,59,],[-17,-21,-22,-23,-24,-25,-26,-27,-15,-16,-28,-18,-19,-20,58,-31,-30,]),'TIMES':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[32,-21,-22,-23,-24,-25,-26,-27,32,32,-28,-18,-19,-20,]),'DIVIDE':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[33,-21,-22,-23,-24,-25,-26,-27,33,33,-28,-18,-19,-20,]),'MOD':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[34,-21,-22,-23,-24,-25,-26,-27,34,34,-28,-18,-19,-20,]),} diff --git a/script/local/parser/utils.py b/script/local/parser/utils.py index 5150eded..539c6b52 100644 --- a/script/local/parser/utils.py +++ b/script/local/parser/utils.py @@ -1,3 +1,23 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# import optparse import subprocess from decimal import Decimal @@ -25,9 +45,9 @@ def set_color(nc): def print_title_1(info): global nocolor if nocolor != 0: - print('======== ' + info +' ========') + print('======== ' + info + ' ========') else: - print('\033[0;37;46m======== ' + info +' ========\033[0m') + print('\033[0;37;46m======== ' + info + ' ========\033[0m') def print_title_2(info): print('-------- ' + info + ' --------') @@ -201,23 +221,23 @@ def format_size(size): unit_index += 1 return "%.2f %s"%(size, units[unit_index]) -min_s = 60 -hour_s = 60 * min_s -day_s = 24 * hour_s +MIN_S = 60 +HOUR_S = 60 * MIN_S +DAY_S = 24 * HOUR_S def format_epoch_to_time(epoch): time = '' - if epoch > day_s: - days = "%d" % (epoch/day_s) - epoch = epoch % day_s - time += ' ' + days + 'd' - if epoch > hour_s: - hours = '%d' % (epoch / hour_s) - epoch = epoch % hour_s + if epoch > DAY_S: + days = "%d" % (epoch/DAY_S) + epoch = epoch % DAY_S + time += ' ' + days + 'd' + if epoch > HOUR_S: + hours = '%d' % (epoch / HOUR_S) + epoch = epoch % HOUR_S time += ' ' + hours + 'h' - if epoch > min_s: - mins = '%d' % (epoch/min_s) - epoch = epoch % min_s + if epoch > MIN_S: + mins = '%d' % (epoch/MIN_S) + epoch = epoch % MIN_S time += ' ' + mins + 'm' time += ' ' + '%02d'%epoch + 's' return time diff --git a/script/local/parser/variables.py b/script/local/parser/variables.py index 5db19b8d..14197536 100644 --- a/script/local/parser/variables.py +++ b/script/local/parser/variables.py @@ -1,3 +1,23 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# import sys import os localDirPath = os.path.dirname(os.path.realpath(__file__)) @@ -22,7 +42,7 @@ variable_dict = { def get_variable(name): try: - val = get_dependency_setting(name) + val = get_dependency_setting(name) except: val = None if not isinstance(val, str): diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 3a065320..c7aeb84d 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -1,3 +1,23 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : LocalCheckOS.py is a utility to check OS info on local node. +############################################################################# import re import types import sys @@ -9,12 +29,12 @@ import inspect # Change these to modify the default behavior of yacc (if you wish) #----------------------------------------------------------------------------- -yaccdebug = False # Debugging mode. If set, yacc generates a +YACC_DEBUG = False # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directory -debug_file = 'parser.out' # Default name of the debugging file -error_count = 3 # Number of symbols that must be shifted to leave recovery mode -resultlimit = 40 # Size limit of results when running in debug mode. +DEBUG_FILE = 'parser.out' # Default name of the debugging file +ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode +RESULT_LIMIT = 40 # Size limit of results when running in debug mode. MAXINT = sys.maxsize @@ -52,8 +72,8 @@ def format_result(r): repr_str = repr(r) if '\n' in repr_str: repr_str = repr(repr_str) - if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit] + ' ...' + if len(repr_str) > RESULT_LIMIT: + repr_str = repr_str[:RESULT_LIMIT] + ' ...' result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result @@ -208,10 +228,10 @@ class LRParser: lookahead = None # Current lookahead symbol lookaheadstack = [] # Stack of lookahead symbols actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules + pslice = YaccProduction(None) # Production object passed to grammar rules errorcount = 0 # Used during error recovery if debug: @@ -237,7 +257,7 @@ class LRParser: statestack = self.statestack = [] # Stack of parsing states symstack = self.symstack = [] # Stack of grammar symbols pslice.stack = symstack # Put in the production - errtoken = None # Err token + errtoken = None # Err token # The start state is assumed to be (0,$end) @@ -297,7 +317,7 @@ class LRParser: # reduce a symbol on the stack, emit a production p = prod[-t] pname = p.name - plen = p.len + plen = p.len # Get production function sym = YaccSymbol() @@ -352,7 +372,7 @@ class LRParser: sym.type = 'error' sym.value = 'error' lookahead = sym - errorcount = error_count + errorcount = ERROR_COUNT self.errorok = False continue @@ -389,7 +409,7 @@ class LRParser: sym.type = 'error' sym.value = 'error' lookahead = sym - errorcount = error_count + errorcount = ERROR_COUNT self.errorok = False continue @@ -421,7 +441,7 @@ class LRParser: # first syntax error. This function is only called if # errorcount == 0. if errorcount == 0 or self.errorok: - errorcount = error_count + errorcount = ERROR_COUNT self.errorok = False errtoken = lookahead if errtoken.type == '$end': @@ -453,7 +473,7 @@ class LRParser: return else: - errorcount = error_count + errorcount = ERROR_COUNT # case 1: the statestack only has 1 entry on it. If we're in this state, the # entire parse has been rolled back and we're completely hosed. The token is @@ -2191,8 +2211,8 @@ class ParserReflect(object): # Build a parser # ----------------------------------------------------------------------------- -def yacc(*, debug=yaccdebug, module=None, start=None, - check_recursion=True, optimize=False, debugfile=debug_file, +def yacc(*, debug=YACC_DEBUG, module=None, start=None, + check_recursion=True, optimize=False, debugfile=DEBUG_FILE, debuglog=None, errorlog=None): # Reference to the parsing method of the last built parser -- Gitee From 46b8cd4c45e565b62e784c17e97e16f16c4dd682 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 2 Dec 2024 18:58:51 +0800 Subject: [PATCH 03/87] check02 --- script/local/LocalCheckSE.py | 8 +-- script/local/parser/lex.py | 4 +- script/local/parser/parsetab.py | 6 +- script/local/parser/yacc.py | 97 ++++++++++++++++----------------- 4 files changed, 57 insertions(+), 58 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index 61895bc1..24a048cb 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -2627,12 +2627,12 @@ def check_shared_buffers_hit_rate(): input : Bool output : NA """ - shared_buffers_hit_rate = float(execute_query( + shared_buffers_hit_rate = float(execute_query( """select sum(idx_blks_hit)*100/(sum(idx_blks_read)+sum(idx_blks_hit)+1) from pg_statio_all_tables;""")) - if shared_buffers_hit_rate > 99.99: + if shared_buffers_hit_rate > 99.99: message = " Warning reason:This is too high. If this openGauss instance was recently used as it usually is and was not stopped since, then you may reduce shared_buffer" g_logger.log(message) - if shared_buffers_hit_rate < 90: + if shared_buffers_hit_rate < 90: message = " Warning reason:This is too low. Increase shared_buffer memory to increase hit rate" g_logger.log(message) @@ -4988,7 +4988,7 @@ def check_planner(): input : Bool output : NA """ - modified_costs = execute_query("""select name from pg_settings where name like '%cost%' and setting<>boot_val;""") + modified_costs = execute_query("""select name from pg_settings where name like '%cost%' and setting<>boot_val;""") disabled_plan_functions = execute_query( """select name, setting from pg_settings where name like 'enable_%' and setting='off' ;""") if len(modified_costs) != 0: diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 2a7bd3f9..606c5c95 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -400,7 +400,7 @@ class LexerReflect(object): self.error_func = None self.tokens = [] self.reflags = reflags - self.stateinfo = {'INITIAL': 'inclusive'} + self.stateinfo = {'INITIAL': 'inclusive'} self.modules = set() self.error = False self.log = Logger(sys.stderr) if log is None else log @@ -705,7 +705,7 @@ def lex(*, module=None, object=None, debug=False, global lexer ldict = None - stateinfo = {'INITIAL': 'inclusive'} + stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() global token, input diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py index 659f36e4..2a4b8fef 100644 --- a/script/local/parser/parsetab.py +++ b/script/local/parser/parsetab.py @@ -18,11 +18,11 @@ # ---------------------------------------------------------------------------- # Description : LocalCheckOS.py is a utility to check OS info on local node. ############################################################################# -TAB_VERSION = '3.10' +TAB_VERSION = '3.10' -LR_METHOD = 'LALR' +LR_METHOD = 'LALR' -LR_SIGNATURE = 'AND COMMA DIVIDE EQUAL FALSE GE GT ID LE LPAREN LT MINUS MOD NEQUAL NOT NULL NUMBER OR PLUS RPAREN STRING THEN TIMES TRUEsentence : conditions THEN function \n conditions : conditions OR and_conditionsconditions : and_conditions\n and_conditions : and_conditions AND not_conditions\n and_conditions : not_conditionsnot_conditions : NOT cdtnot_conditions : cdt\n cdt : expr EQUAL expr\n | expr NEQUAL expr\n | expr GE expr\n | expr GT expr\n | expr LE expr\n | expr LT expr\n cdt : LPAREN conditions RPAREN\n expr : expr PLUS term\n | expr MINUS term\n expr : term\n term : term TIMES factor\n | term DIVIDE factor\n | term MOD factor\n term : factor\n factor : NUMBER\n | STRING\n factor : IDfactor : NULL\n factor : TRUE\n | FALSE\n factor : LPAREN expr RPARENfunction : ID LPAREN variables RPAREN\n variables : variables COMMA expr\n variables : expr' +LR_SIGNATURE = 'AND COMMA DIVIDE EQUAL FALSE GE GT ID LE LPAREN LT MINUS MOD NEQUAL NOT NULL NUMBER OR PLUS RPAREN STRING THEN TIMES TRUEsentence : conditions THEN function \n conditions : conditions OR and_conditionsconditions : and_conditions\n and_conditions : and_conditions AND not_conditions\n and_conditions : not_conditionsnot_conditions : NOT cdtnot_conditions : cdt\n cdt : expr EQUAL expr\n | expr NEQUAL expr\n | expr GE expr\n | expr GT expr\n | expr LE expr\n | expr LT expr\n cdt : LPAREN conditions RPAREN\n expr : expr PLUS term\n | expr MINUS term\n expr : term\n term : term TIMES factor\n | term DIVIDE factor\n | term MOD factor\n term : factor\n factor : NUMBER\n | STRING\n factor : IDfactor : NULL\n factor : TRUE\n | FALSE\n factor : LPAREN expr RPARENfunction : ID LPAREN variables RPAREN\n variables : variables COMMA expr\n variables : expr' _lr_action_items = {'NOT':([0,8,18,19,29,],[5,5,5,5,5,]),'LPAREN':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,36,40,53,58,],[8,8,29,8,8,40,40,40,40,40,40,40,40,29,40,40,40,53,40,40,40,]),'NUMBER':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'STRING':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'ID':([0,5,8,17,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[13,13,13,36,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'NULL':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'TRUE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'FALSE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'$end':([1,35,57,],[0,-1,-29,]),'THEN':([2,3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[17,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'OR':([2,3,4,6,9,10,11,12,13,14,15,16,20,30,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[18,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,18,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'RPAREN':([3,4,6,9,10,11,12,13,14,15,16,20,30,31,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,54,55,56,59,],[-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,48,49,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,49,57,-31,-30,]),'AND':([3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[19,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,19,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'EQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[21,-17,-21,-22,-23,-24,-25,-26,-27,21,-15,-16,-28,-18,-19,-20,]),'NEQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[22,-17,-21,-22,-23,-24,-25,-26,-27,22,-15,-16,-28,-18,-19,-20,]),'GE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[23,-17,-21,-22,-23,-24,-25,-26,-27,23,-15,-16,-28,-18,-19,-20,]),'GT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[24,-17,-21,-22,-23,-24,-25,-26,-27,24,-15,-16,-28,-18,-19,-20,]),'LE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[25,-17,-21,-22,-23,-24,-25,-26,-27,25,-15,-16,-28,-18,-19,-20,]),'LT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[26,-17,-21,-22,-23,-24,-25,-26,-27,26,-15,-16,-28,-18,-19,-20,]),'PLUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[27,-17,-21,-22,-23,-24,-25,-26,-27,27,27,27,27,27,27,27,-15,-16,-28,-18,-19,-20,27,27,27,]),'MINUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[28,-17,-21,-22,-23,-24,-25,-26,-27,28,28,28,28,28,28,28,-15,-16,-28,-18,-19,-20,28,28,28,]),'COMMA':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,55,56,59,],[-17,-21,-22,-23,-24,-25,-26,-27,-15,-16,-28,-18,-19,-20,58,-31,-30,]),'TIMES':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[32,-21,-22,-23,-24,-25,-26,-27,32,32,-28,-18,-19,-20,]),'DIVIDE':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[33,-21,-22,-23,-24,-25,-26,-27,33,33,-28,-18,-19,-20,]),'MOD':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[34,-21,-22,-23,-24,-25,-26,-27,34,34,-28,-18,-19,-20,]),} diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index c7aeb84d..ba918220 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -567,18 +567,18 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): - self.name = name - self.prod = tuple(prod) - self.number = number - self.func = func + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func self.callable = None - self.file = file - self.line = line - self.prec = precedence + self.file = file + self.line = line + self.prec = precedence # Internal settings used during table construction - self.len = len(self.prod) # Length of the production + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production self.usyms = [] @@ -658,15 +658,15 @@ class Production(object): class LRItem(object): def __init__(self, p, n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n self.lookaheads = {} self.prod.insert(n, '.') - self.prod = tuple(self.prod) - self.len = len(self.prod) - self.usyms = p.usyms + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms def __str__(self): if self.prod: @@ -704,17 +704,17 @@ class GrammarError(YaccError): class Grammar(object): def __init__(self, terminals): - self.Productions = [None] # A list of all of the productions. The first + self.Productions = [None] # A list of all of the productions. The first # entry is always reserved for the purpose of # building an augmented grammar - self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all # productions of that nonterminal. - self.Prodmap = {} # A dictionary that is only used to detect duplicate + self.Prodmap = {} # A dictionary that is only used to detect duplicate # productions. - self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a # list of the rules where they are used. for term in terminals: @@ -725,11 +725,11 @@ class Grammar(object): self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list # of rule numbers where they are used. - self.First = {} # A dictionary of precomputed FIRST(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. @@ -831,7 +831,7 @@ class Grammar(object): 'Previous definition at %s:%d' % (m.file, m.line)) # From this point on, everything is valid. Create a new Production instance - pnumber = len(self.Productions) + pnumber = len(self.Productions) if prodname not in self.Nonterminals: self.Nonterminals[prodname] = [] @@ -1268,21 +1268,21 @@ class LRTable: self.log = log # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures + self.lr0_cidhash = {} # Cache of closures - self._add_count = 0 # Internal counter used to detect cycles + self._add_count = 0 # Internal counter used to detect cycles # Diagnostic information filled in by the table generator - self.sr_conflict = 0 - self.rr_conflict = 0 - self.conflicts = [] # List of conflicts + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts - self.sr_conflicts = [] - self.rr_conflicts = [] + self.sr_conflicts = [] + self.rr_conflicts = [] # Build the tables self.grammar.build_lritems() @@ -1612,7 +1612,7 @@ class LRTable: def compute_read_sets(self, C, ntrans, nullable): FP = lambda x: self.dr_relation(C, x, nullable) - R = lambda x: self.reads_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) F = digraph(ntrans, R, FP) return F @@ -1634,7 +1634,7 @@ class LRTable: def compute_follow_sets(self, ntrans, readsets, inclsets): FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x, []) + R = lambda x: inclsets.get(x, []) F = digraph(ntrans, R, FP) return F @@ -1694,10 +1694,10 @@ class LRTable: # ----------------------------------------------------------------------------- def lr_parse_table(self): Productions = self.grammar.Productions - Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array + Precedence = self.grammar.Precedence + goto = self.lr_goto # Goto array action = self.lr_action # Action array - log = self.log # Logger for output + log = self.log # Logger for output actionp = {} # Action production array (temporary) @@ -1712,9 +1712,9 @@ class LRTable: for I in C: # Loop over each production in I actlist = [] # List of actions - st_action = {} + st_action = {} st_actionp = {} - st_goto = {} + st_goto = {} log.info('') log.info('state %d', st) log.info('') @@ -1913,7 +1913,7 @@ def parse_grammar(doc, file, line): else: prodname = p[0] lastp = prodname - syms = p[2:] + syms = p[2:] assign = p[1] if assign != ':' and assign != '::=': raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) @@ -1935,13 +1935,13 @@ def parse_grammar(doc, file, line): # ----------------------------------------------------------------------------- class ParserReflect(object): def __init__(self, pdict, log=None): - self.pdict = pdict - self.start = None + self.pdict = pdict + self.start = None self.error_func = None - self.tokens = None - self.modules = set() - self.grammar = [] - self.error = False + self.tokens = None + self.modules = set() + self.grammar = [] + self.error = False if log is None: self.log = Logger(sys.stderr) @@ -2396,8 +2396,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, debuglog.warning('') for state, tok, resolution in lr.sr_conflicts: - debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) - + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) already_reported = set() for state, rule, rejected in lr.rr_conflicts: if (state, id(rule), id(rejected)) in already_reported: -- Gitee From f847c85249be93282d4d1fcc1ca13649681a17bc Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 2 Dec 2024 19:16:48 +0800 Subject: [PATCH 04/87] check03 --- script/local/parser/lex.py | 8 ------ script/local/parser/myLexer.py | 1 - script/local/parser/myYACC.py | 5 ---- script/local/parser/yacc.py | 49 ---------------------------------- 4 files changed, 63 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 606c5c95..e1732454 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -306,7 +306,6 @@ class Lexer: # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- -# _get_regex(func) # # Returns the regular expression assigned to a function either as a doc string # or as a .regex attribute attached by the @TOKEN decorator. @@ -315,7 +314,6 @@ def _get_regex(func): return getattr(func, 'regex', func.__doc__) # ----------------------------------------------------------------------------- -# get_caller_module_dict() # # This function returns a dictionary containing all of the symbols defined within # a caller further down the call stack. This is used to get the environment @@ -326,7 +324,6 @@ def get_caller_module_dict(levels): return {**f.f_globals, **f.f_locals} # ----------------------------------------------------------------------------- -# _form_master_re() # # This function takes a list of all of the regex components and attempts to # form the master regular expression. Given limitations in the Python re @@ -389,7 +386,6 @@ def _statetoken(s, names): # ----------------------------------------------------------------------------- -# LexerReflect() # # This class represents information needed to build a lexer as extracted from a # user's input file. @@ -497,7 +493,6 @@ class LexerReflect(object): self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various - # categories (functions, strings, error functions, and ignore characters) def get_rules(self): tsymbols = [f for f in self.ldict if f[:2] == 't_'] @@ -661,7 +656,6 @@ class LexerReflect(object): self.validate_module(module) # ----------------------------------------------------------------------------- - # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression @@ -695,7 +689,6 @@ class LexerReflect(object): linen += 1 # ----------------------------------------------------------------------------- -# lex(module) # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- @@ -838,7 +831,6 @@ def lex(*, module=None, object=None, debug=False, return lexobj # ----------------------------------------------------------------------------- -# runmain() # # This runs the lexer as a main program # ----------------------------------------------------------------------------- diff --git a/script/local/parser/myLexer.py b/script/local/parser/myLexer.py index e91c47b2..aa7f7090 100644 --- a/script/local/parser/myLexer.py +++ b/script/local/parser/myLexer.py @@ -130,7 +130,6 @@ class MyLexer(): # Error handling rule def t_error(self,t): - #print("Illegal character '%s'" % t.value[0]) raise Exception('Illegal character "%s"' % t.value[0]) t.lexer.skip(1) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index c19f33dc..968170c0 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -90,10 +90,6 @@ class MyYACC(): p[0] = (p[1] <= p[3]) if p[2] == token_dict['LT']: p[0] = (p[1] < p[3]) - - # def p_cdt_expr(p): - # 'cdt : expr' - # p[0] = p[1] def p_cdt_parens(p): 'cdt : LPAREN conditions RPAREN' @@ -174,7 +170,6 @@ class MyYACC(): #Error rule for syntax errors def p_error(p): - #print("Syntax error in input!") raise Exception('Syntax error in input!') def build(self): diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index ba918220..572b28d0 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -97,12 +97,6 @@ def format_stack_entry(r): # This class is used to hold non-terminal grammar symbols during parsing. # It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) class YaccSymbol: def __str__(self): @@ -679,7 +673,6 @@ class LRItem(object): return 'LRItem(' + str(self) + ')' # ----------------------------------------------------------------------------- -# rightmost_terminal() # # Return the rightmost terminal from a list of symbols. Used in add_production() # ----------------------------------------------------------------------------- @@ -746,7 +739,6 @@ class Grammar(object): return self.Productions[index] # ----------------------------------------------------------------------------- - # set_precedence() # # Sets the precedence for a given terminal. assoc is the associativity such as # 'left','right', or 'nonassoc'. level is a numeric level. @@ -762,7 +754,6 @@ class Grammar(object): self.Precedence[term] = (assoc, level) # ----------------------------------------------------------------------------- - # add_production() # # Given an action function, this function assembles a production rule and # computes its precedence level. @@ -856,7 +847,6 @@ class Grammar(object): self.Prodnames[prodname] = [p] # ----------------------------------------------------------------------------- - # set_start() # # Sets the starting symbol and creates the augmented grammar. Production # rule 0 is S' -> start where start is the start symbol. @@ -872,7 +862,6 @@ class Grammar(object): self.Start = start # ----------------------------------------------------------------------------- - # find_unreachable() # # Find all of the nonterminal symbols that can't be reached from the starting # symbol. Returns a list of nonterminals that can't be reached. @@ -894,7 +883,6 @@ class Grammar(object): return [s for s in self.Nonterminals if s not in reachable] # ----------------------------------------------------------------------------- - # infinite_cycles() # # This function looks at the various parsing rules and tries to detect # infinite recursion cycles (grammar rules where there is no possible way @@ -959,7 +947,6 @@ class Grammar(object): return infinite # ----------------------------------------------------------------------------- - # undefined_symbols() # # Find all symbols that were used the grammar, but not defined as tokens or # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol @@ -977,7 +964,6 @@ class Grammar(object): return result # ----------------------------------------------------------------------------- - # unused_terminals() # # Find all terminals that were defined, but not used by the grammar. Returns # a list of all symbols. @@ -991,7 +977,6 @@ class Grammar(object): return unused_tok # ------------------------------------------------------------------------------ - # unused_rules() # # Find all grammar rules that were defined, but not used (maybe not reachable) # Returns a list of productions. @@ -1006,7 +991,6 @@ class Grammar(object): return unused_prod # ----------------------------------------------------------------------------- - # unused_precedence() # # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal @@ -1023,7 +1007,6 @@ class Grammar(object): return unused # ------------------------------------------------------------------------- - # _first() # # Compute the value of FIRST1(beta) where beta is a tuple of symbols. # @@ -1061,7 +1044,6 @@ class Grammar(object): return result # ------------------------------------------------------------------------- - # compute_first() # # Compute the value of FIRST1(X) for all symbols # ------------------------------------------------------------------------- @@ -1096,7 +1078,6 @@ class Grammar(object): return self.First # --------------------------------------------------------------------- - # compute_follow() # # Computes all of the follow sets for every non-terminal symbol. The # follow set is the set of all symbols that might follow a given @@ -1147,7 +1128,6 @@ class Grammar(object): # ----------------------------------------------------------------------------- - # build_lritems() # # This function walks the list of productions and builds a complete set of the # LR items. The LR items are stored in two ways: First, they are uniquely @@ -1197,13 +1177,10 @@ class Grammar(object): # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- -# digraph() -# traverse() # # The following two functions are used to compute set valued functions # of the form: # -# F(x) = F'(x) U U{F(y) | x R y} # # This is used to compute the values of Read() sets as well as FOLLOW sets # in LALR(1) generation. @@ -1408,7 +1385,6 @@ class LRTable: # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- - # compute_nullable_nonterminals() # # Creates a dictionary containing all of the non-terminals that might produce # an empty production. @@ -1433,7 +1409,6 @@ class LRTable: return nullable # ----------------------------------------------------------------------------- - # find_nonterminal_trans(C) # # Given a set of LR(0) items, this functions finds all of the non-terminal # transitions. These are transitions in which a dot appears immediately before @@ -1455,7 +1430,6 @@ class LRTable: return trans # ----------------------------------------------------------------------------- - # dr_relation() # # Computes the DR(p,A) relationships for non-terminal transitions. The input # is a tuple (state,N) where state is a number and N is a nonterminal symbol. @@ -1482,7 +1456,6 @@ class LRTable: return terms # ----------------------------------------------------------------------------- - # reads_relation() # # Computes the READS() relation (p,A) READS (t,C). # ----------------------------------------------------------------------------- @@ -1503,7 +1476,6 @@ class LRTable: return rel # ----------------------------------------------------------------------------- - # compute_lookback_includes() # # Determines the lookback and includes relations # @@ -1521,9 +1493,6 @@ class LRTable: # # This relation is used to determine non-terminal transitions that occur # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) - # if the following holds: - # - # B -> LAT, where T -> epsilon and p' -L-> p # # L is essentially a prefix (which may be empty), T is a suffix that must be # able to derive an empty string. State p' must lead to state p with the string L. @@ -1599,13 +1568,9 @@ class LRTable: return lookdict, includedict # ----------------------------------------------------------------------------- - # compute_read_sets() # # Given a set of LR(0) items, this function computes the read sets. # - # Inputs: C = Set of LR(0) items - # ntrans = Set of nonterminal transitions - # nullable = Set of empty transitions # # Returns a set containing the read sets # ----------------------------------------------------------------------------- @@ -1617,17 +1582,12 @@ class LRTable: return F # ----------------------------------------------------------------------------- - # compute_follow_sets() # # Given a set of LR(0) items, a set of non-terminal transitions, a readset, # and an include set, this function computes the follow sets # - # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} # # Inputs: - # ntrans = Set of nonterminal transitions - # readsets = Readset (previously computed) - # inclsets = Include sets (previously computed) # # Returns a set containing the follow sets # ----------------------------------------------------------------------------- @@ -1639,7 +1599,6 @@ class LRTable: return F # ----------------------------------------------------------------------------- - # add_lookaheads() # # Attaches the lookahead symbols to grammar rules. # @@ -1662,7 +1621,6 @@ class LRTable: p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- - # add_lalr_lookaheads() # # This function does all of the work of adding lookahead information for use # with LALR parsing @@ -1688,7 +1646,6 @@ class LRTable: self.add_lookaheads(lookd, followsets) # ----------------------------------------------------------------------------- - # lr_parse_table() # # This function constructs the parse tables for SLR or LALR # ----------------------------------------------------------------------------- @@ -1701,7 +1658,6 @@ class LRTable: actionp = {} # Action production array (temporary) - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items # This determines the number of states C = self.lr0_items() @@ -1873,7 +1829,6 @@ class LRTable: st += 1 # ----------------------------------------------------------------------------- -# get_caller_module_dict() # # This function returns a dictionary containing all of the symbols defined within # a caller further down the call stack. This is used to get the environment @@ -1888,7 +1843,6 @@ def get_caller_module_dict(levels): return ldict # ----------------------------------------------------------------------------- -# parse_grammar() # # This takes a raw grammar rule string and parses it into production data # ----------------------------------------------------------------------------- @@ -1927,7 +1881,6 @@ def parse_grammar(doc, file, line): return grammar # ----------------------------------------------------------------------------- -# ParserReflect() # # This class represents information extracted for building a parser including # start symbol, error function, tokens, precedence list, action functions, @@ -1984,7 +1937,6 @@ class ParserReflect(object): return ''.join(parts) # ----------------------------------------------------------------------------- - # validate_modules() # # This method checks to see if there are duplicated p_rulename() functions # in the parser module file. Without this function, it is really easy for @@ -2206,7 +2158,6 @@ class ParserReflect(object): self.grammar = grammar # ----------------------------------------------------------------------------- -# yacc(module) # # Build a parser # ----------------------------------------------------------------------------- -- Gitee From 6ef1cc78a059ecd1f876e6518ed1e05631d3d165 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 2 Dec 2024 19:27:31 +0800 Subject: [PATCH 05/87] code check03 --- script/local/LocalCheckSE.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index 24a048cb..b32ea5c2 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -2541,9 +2541,9 @@ def execute_query(sql_query): ############################################################################# -def check_connection_status(isSetting): +def check_connection_status(is_setting): """ - function : checkConnectionStatus + function : check_connection_status input : Bool output : NA """ @@ -2692,7 +2692,7 @@ def checkConnection(isSetting=False): checkHostnossl() checkHostAddressno0() checkSSLConnection(isSetting) - check_connection_status(isSetting) + check_connection_status(is_setting) def checkMonitorIP(isSetting): @@ -4823,7 +4823,7 @@ def checkOtherConfigurations(isSetting=False): """ checkBackslashQuote(isSetting) checkAllowSystemTableMods(isSetting) - check_running_time(isSetting) + check_running_time(is_setting) check_users() check_phase_commit() check_autovacuum() @@ -4870,8 +4870,8 @@ def checkAllowSystemTableMods(isSetting): setAllowSystemTableMods(data) -def check_running_time(isSetting): - """ +def check_running_time(is_setting): + """" function : check_running_time input : Bool output : NA -- Gitee From 3b8692376929ca5681f1a7e79df1e5d01bd09134 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 2 Dec 2024 20:01:54 +0800 Subject: [PATCH 06/87] code check03 --- script/local/LocalCheckSE.py | 8 ++++---- script/local/parser/myLexer.py | 3 ++- script/local/parser/myYACC.py | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index b32ea5c2..98926900 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -2541,7 +2541,7 @@ def execute_query(sql_query): ############################################################################# -def check_connection_status(is_setting): +def check_connection_status(): """ function : check_connection_status input : Bool @@ -2692,7 +2692,7 @@ def checkConnection(isSetting=False): checkHostnossl() checkHostAddressno0() checkSSLConnection(isSetting) - check_connection_status(is_setting) + check_connection_status() def checkMonitorIP(isSetting): @@ -4823,7 +4823,7 @@ def checkOtherConfigurations(isSetting=False): """ checkBackslashQuote(isSetting) checkAllowSystemTableMods(isSetting) - check_running_time(is_setting) + check_running_time() check_users() check_phase_commit() check_autovacuum() @@ -4870,7 +4870,7 @@ def checkAllowSystemTableMods(isSetting): setAllowSystemTableMods(data) -def check_running_time(is_setting): +def check_running_time(): """" function : check_running_time input : Bool diff --git a/script/local/parser/myLexer.py b/script/local/parser/myLexer.py index aa7f7090..a33c5a38 100644 --- a/script/local/parser/myLexer.py +++ b/script/local/parser/myLexer.py @@ -16,7 +16,8 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : LocalCheckOS.py is a utility to check OS info on local node. +# Description : CheckConfig.py is a local utility to +# execute some functions about init instance ############################################################################# import os import sys diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 968170c0..0d692881 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -16,7 +16,8 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : CheckInstall.py is a utility to install Gauss MPP Database. +# Description : CheckConfig.py is a local utility to +# execute some functions about init instance ############################################################################# import os import sys -- Gitee From c0c480804ea82055f13fe33fd045de5107d8d1fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Tue, 3 Dec 2024 08:58:16 +0000 Subject: [PATCH 07/87] code check04 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/LocalCheckSE.py | 251 ++++++++++++++++++++--------------- 1 file changed, 141 insertions(+), 110 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index 98926900..ea10718c 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -2616,7 +2616,7 @@ def check_memory_usage_situation(): def convert_memory_str_to_num(mem_str): - units = {'KB': 1, 'MB': 1024, 'GB': 1024, 'TB': 1024 ** 3} + units = {'KB': 1, 'MB': 1024, 'GB': 1024**2, 'TB': 1024**3} num, unit = mem_str[:-2], mem_str[-2:] return int(float(num) * units[unit]) @@ -5100,125 +5100,156 @@ def check_bgwriter(): def check_hugepages(): + os_name = get_os_name() + if not is_supported_os(os_name): + g_logger.log(" Warning reason: No Huge Pages on this OS") + return + + nr_hugepages = get_nr_hugepages() + if not is_hugepages_available(nr_hugepages): + g_logger.log(" Warning reason: No Huge Pages available on the system") + return + huge_pages = execute_query("""show enable_huge_pages""") - os_name = get_cmd_res("uname -s") - if os_name != 'linux' and os_name != 'Linux' and os_name != 'freebsd': - g_logger.log(" Warning reason:No Huge Pages on this OS") + if huge_pages == 'on': + g_logger.log(" Warning reason: enable_huge_pages=on, therefore openGauss needs Huge Pages and will not start if the kernel doesn't provide them") else: - nr_hugepages = get_cmd_res("cat /proc/sys/vm/nr_hugepages") - if nr_hugepages == None or int(nr_hugepages) == 0: - g_logger.log(" Warning reason:No Huge Pages available on the system") - else: - if huge_pages == 'on': - g_logger.log( - " Warning reason:enable_huge_pages=on, therefore openGauss needs Huge Pages and will not start if the kernel doesn't provide the") - else: - os_huge = subprocess.run("grep ^Huge /proc/meminfo", shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, universal_newlines=True).stdout - os_huge_list = os_huge.replace(' ', '').split('\n') - os_info = {} - # 使用循环遍历分割后的列表,并将键值对添加到字典中 - for item in os_huge_list: - if item != '': - key, value = item.split(':') - os_info[key.strip()] = int(re.search(r'\d+', value.strip()).group()) - pg_pid = execute_query("""SELECT pg_backend_pid();""") - peak = get_cmd_res("grep ^VmPeak /proc/" + pg_pid + "/status | awk '{ print $2 }'").strip() - if peak.isdigit(): - suggesthugepages = int(peak) / int(os_info['Hugepagesize']) - if os_info['HugePages_Total'] < int(suggesthugepages + 0.5): - message = " Warning reason:set vm.nr_hugepages=%d" % int( - suggesthugepages + 0.5) + " in /etc/sysctl.conf and invoke sysctl -p /etc/sysctl.conf to reload it. This will allocate Huge Pages (it may require a system reboot)" - g_logger.log(message) - if os_info['Hugepagesize'] == 2048: - g_logger.log(" Warning reason:Change Huge Pages size from 2MB to 1GB if the machine is dedicated to openGauss") + check_and_suggest_hugepages(huge_pages) + +def get_os_name(): + return get_cmd_res("uname -s") + +def is_supported_os(os_name): + return os_name in ['linux', 'Linux', 'freebsd'] + +def get_nr_hugepages(): + return get_cmd_res("cat /proc/sys/vm/nr_hugepages") + +def is_hugepages_available(nr_hugepages): + return nr_hugepages is not None and int(nr_hugepages) > 0 + +def check_and_suggest_hugepages(huge_pages): + os_huge_info = get_os_huge_info() + pg_pid = execute_query("""SELECT pg_backend_pid();""") + peak = get_peak_memory(pg_pid) + if peak and peak.isdigit(): + suggesthugepages = calculate_suggested_hugepages(peak, os_huge_info) + suggest_and_log_hugepages(suggesthugepages, os_huge_info) + check_hugepages_size(os_huge_info) + +def get_os_huge_info(): + os_huge = subprocess.run("grep ^Huge /proc/meminfo", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True).stdout + os_huge_list = os_huge.replace(' ', '').split('\n') + os_info = {} + for item in os_huge_list: + if item: + key, value = item.split(':') + os_info[key.strip()] = int(re.search(r'\d+', value.strip()).group()) + return os_info + +def get_peak_memory(pg_pid): + return get_cmd_res(f"grep ^VmPeak /proc/{pg_pid}/status | awk '{{ print \$2 }}'").strip() + + +def calculate_suggested_hugepages(peak, os_huge_info): + hugepagesize = os_huge_info.get('Hugepagesize') + if hugepagesize is None: + return 0 + try: + peak_value = int(peak) + suggested_hugepages = peak_value / hugepagesize + return suggested_hugepages + except ValueError: + return 0 + +def suggest_and_log_hugepages(suggesthugepages, os_huge_info): + huge_pages_total = os_huge_info.get('HugePages_Total', 0) + if huge_pages_total < int(suggesthugepages + 0.5): + message = f" Warning reason: set vm.nr_hugepages={int(suggesthugepages + 0.5)} in /etc/sysctl.conf and invoke sysctl -p /etc/sysctl.conf to reload it. This will allocate Huge Pages (it may require a system reboot)" + g_logger.log(message) + +def check_hugepages_size(os_huge_info): + hugepagesize = os_huge_info.get('Hugepagesize') + if hugepagesize == 2048: + g_logger.log(" Warning reason: Change Huge Pages size from 2MB to 1GB if the machine is dedicated to openGauss") def check_io_schedule(ssd=0): active_schedulers = {} - os = {} - os['name'] = get_cmd_res("uname -s") - if os['name'] == 'darwin': - g_logger.log(" Warning reason:No I/O scheduler information on MacOS") - else: - storage_units_list = subprocess.run("ls /sys/block/", shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, universal_newlines=True) - if storage_units_list.returncode != 0: - g_logger.log(" Warning reason:Unable to explore storage unit(s) system attributes") - else: - for unit in storage_units_list.stdout.split('\n'): - if unit == '.' or unit == '..' or unit == '': - continue - if unit.startswith('sr'): + system_info = {} + system_info['name'] = get_cmd_res("uname -s") + if system_info['name'] == 'darwin': + return + storage_units_list = subprocess.run("ls /sys/block/", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + if storage_units_list.returncode == 0: + for unit in storage_units_list.stdout.split('\n'): + if unit == '.' or unit == '..' or unit == '': + continue + if unit.startswith('sr'): + continue + # Scheduler + unit_schedulers = subprocess.run("cat /sys/block/%s/queue/scheduler" % unit, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_schedulers.returncode == 0: + unit_schedulers = str(unit_schedulers.stdout.strip()) + if unit_schedulers == 'none': continue - # Scheduler - unit_schedulers = subprocess.run("cat /sys/block/%s/queue/scheduler" % unit, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - if unit_schedulers.returncode != 0: - g_logger.log( - " Warning reason:Unable to identify the scheduler used for the storage unit %s" % unit) - else: - unit_schedulers = str(unit_schedulers.stdout.strip()) - if unit_schedulers == 'none': - continue - for scheduler in unit_schedulers.split(): - match = re.match(r'^\[([a-z-]+)\]$', scheduler) - if match: - active_schedulers[match.group(1)] = active_schedulers.get(match.group(1), 0) + 1 - - # Detect SSD or rotational disks - rotational_storage = 0 - unit_is_rotational = 1 # Default - if ssd: + for scheduler in unit_schedulers.split(): + match = re.match(r'^\[([a-z-]+)\]$', scheduler) + if match: + active_schedulers[match.group(1)] = active_schedulers.get(match.group(1), 0) + 1 + # Detect SSD or rotational disks + rotational_storage = 0 + unit_is_rotational = 1 # Default + if ssd: + unit_is_rotational = 0 + else: + unit_is_rotational = subprocess.run("cat /sys/block/%s/queue/rotational" % unit, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_is_rotational.returncode != 0: unit_is_rotational = 0 else: - unit_is_rotational = subprocess.run("cat /sys/block/%s/queue/rotational" % unit, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - if unit_is_rotational.returncode != 0: - g_logger.log( - " Warning reason:Unable to identify if the storage unit %s is rotational" % unit) - unit_is_rotational = 0 - else: - unit_is_rotational = unit_is_rotational.stdout.strip() - rotational_storage += int(unit_is_rotational) - - hypervisor = None - if os['name'] != 'darwin': - systemd = subprocess.run("systemd-detect-virt --vm", shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, universal_newlines=True) - if systemd.returncode == 0: - systemd = systemd.stdout.strip() - if re.match('\S+', systemd): - hypervisor = systemd - else: - dmesg = subprocess.run("dmesg", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - for line in dmesg.stdout.split('\n'): - if re.match('vmware', line, re.IGNORECASE): - hypervisor = 'VMware' - break - elif re.match('kvm', line, re.IGNORECASE): - hypervisor = 'KVM' - break - elif re.match('xen', line, re.IGNORECASE): - hypervisor = 'XEN' - break - elif re.match('vbox', line, re.IGNORECASE): - hypervisor = 'VirtualBox' - break - elif re.match('hyper-v', line, re.IGNORECASE): - hypervisor = 'Hyper-V' - break - - if hypervisor is not None and rotational_storage is not None and rotational_storage > 0: - g_logger.log( - " Warning reason:If openGauss runs in a virtual machine, I cannot know the underlying physical storage type. Use the --ssd arg if the VM only uses SSD storage") + unit_is_rotational = unit_is_rotational.stdout.strip() + rotational_storage += int(unit_is_rotational) + hypervisor = None + if system_info['name'] != 'darwin': + systemd = subprocess.run("systemd-detect-virt --vm", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + if systemd.returncode == 0: + systemd = systemd.stdout.strip() + if re.match('\S+', systemd): + hypervisor = systemd + else: + dmesg = subprocess.run("dmesg", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + for line in dmesg.stdout.split('\n'): + if re.match('vmware', line, re.IGNORECASE): + hypervisor = 'VMware' + break + elif re.match('kvm', line, re.IGNORECASE): + hypervisor = 'KVM' + break + elif re.match('xen', line, re.IGNORECASE): + hypervisor = 'XEN' + break + elif re.match('vbox', line, re.IGNORECASE): + hypervisor = 'VirtualBox' + break + elif re.match('hyper-v', line, re.IGNORECASE): + hypervisor = 'Hyper-V' + break + + if hypervisor is not None and rotational_storage is not None and rotational_storage > 0: + g_logger.log( + " Warning reason:If openGauss runs in a virtual machine, I cannot know the underlying physical storage type. Use the --ssd arg if the VM only uses SSD storage") - if hypervisor is not None and 'cfq' in active_schedulers: - g_logger.log( - " Warning reason:The CFQ scheduler is inadequate on a virtual machine (because the hypervisor and/or underlying kernel is already in charge of the I/O scheduling)") + if hypervisor is not None and 'cfq' in active_schedulers: + g_logger.log( + " Warning reason:The CFQ scheduler is inadequate on a virtual machine (because the hypervisor and/or underlying kernel is already in charge of the I/O scheduling)") def check_dependencies(): -- Gitee From fe92541be565f41dcef91ced041a6db6f9024e66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Tue, 3 Dec 2024 09:19:28 +0000 Subject: [PATCH 08/87] code check06 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/LocalCheckSE.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index ea10718c..6dc079e5 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -2616,8 +2616,10 @@ def check_memory_usage_situation(): def convert_memory_str_to_num(mem_str): - units = {'KB': 1, 'MB': 1024, 'GB': 1024**2, 'TB': 1024**3} + units = {'KB': 1, 'MB': 1024, 'GB': 1024 ** 2, 'TB': 1024 ** 3} num, unit = mem_str[:-2], mem_str[-2:] + if unit not in units: + raise ValueError(f"Invalid unit '{unit}' in memory string.") return int(float(num) * units[unit]) -- Gitee From 39396a0e0f43dd4195a405df43b232c0f5a742aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 01:57:53 +0000 Subject: [PATCH 09/87] check08 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/LocalCheckSE.py | 60 +++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index 6dc079e5..e42d2a15 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -5185,38 +5185,40 @@ def check_io_schedule(ssd=0): return storage_units_list = subprocess.run("ls /sys/block/", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - if storage_units_list.returncode == 0: - for unit in storage_units_list.stdout.split('\n'): - if unit == '.' or unit == '..' or unit == '': - continue - if unit.startswith('sr'): + if storage_units_list.returncode !=0: + return + for unit in storage_units_list.stdout.split('\n'): + if unit == '.' or unit == '..' or unit == '': + continue + if unit.startswith('sr'): + continue + # Scheduler + unit_schedulers = subprocess.run("cat /sys/block/%s/queue/scheduler" % unit, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_schedulers.returncode == 0: + unit_schedulers = str(unit_schedulers.stdout.strip()) + if unit_schedulers == 'none': continue - # Scheduler - unit_schedulers = subprocess.run("cat /sys/block/%s/queue/scheduler" % unit, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - if unit_schedulers.returncode == 0: - unit_schedulers = str(unit_schedulers.stdout.strip()) - if unit_schedulers == 'none': - continue - for scheduler in unit_schedulers.split(): - match = re.match(r'^\[([a-z-]+)\]$', scheduler) - if match: - active_schedulers[match.group(1)] = active_schedulers.get(match.group(1), 0) + 1 - # Detect SSD or rotational disks - rotational_storage = 0 - unit_is_rotational = 1 # Default - if ssd: + for scheduler in unit_schedulers.split(): + match = re.match(r'^\[([a-z-]+)\]$', scheduler) + if match: + active_schedulers[match.group(1)] = active_schedulers.get(match.group(1), 0) + 1 + # Detect SSD or rotational disks + rotational_storage = 0 + unit_is_rotational = 1 # Default + if ssd: + unit_is_rotational = 0 + else: + unit_is_rotational = subprocess.run("cat /sys/block/%s/queue/rotational" % unit, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_is_rotational.returncode != 0: unit_is_rotational = 0 else: - unit_is_rotational = subprocess.run("cat /sys/block/%s/queue/rotational" % unit, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - if unit_is_rotational.returncode != 0: - unit_is_rotational = 0 - else: - unit_is_rotational = unit_is_rotational.stdout.strip() - rotational_storage += int(unit_is_rotational) + unit_is_rotational = unit_is_rotational.stdout.strip() + rotational_storage += int(unit_is_rotational) + hypervisor = None if system_info['name'] != 'darwin': systemd = subprocess.run("systemd-detect-virt --vm", shell=True, stdout=subprocess.PIPE, -- Gitee From 9a486ff03eacae7368bbeec1062a7f8287cde52c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 03:00:46 +0000 Subject: [PATCH 10/87] code check09 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/LocalCheckSE.py | 164 ++++++++++++++++++++++++----------- 1 file changed, 112 insertions(+), 52 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index e42d2a15..9160e357 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -5178,48 +5178,101 @@ def check_hugepages_size(os_huge_info): def check_io_schedule(ssd=0): + # Get system info + system_info = get_system_info() + if system_info['name'] == 'darwin': + return + + # Get storage units list + storage_units_list = get_storage_units_list() + if not storage_units_list: + return + + # Process each storage unit active_schedulers = {} + rotational_storage = 0 + for unit in storage_units_list: + if should_skip_unit(unit): + continue + + unit_schedulers, unit_is_rotational = get_unit_info(unit, ssd) + if unit_schedulers: + active_schedulers = update_active_schedulers(unit_schedulers, active_schedulers) + rotational_storage += unit_is_rotational + + # Check if the system is running in a hypervisor and log the appropriate warnings + hypervisor = detect_hypervisor() + log_warnings(hypervisor, rotational_storage, active_schedulers) + + +def get_system_info(): + """Get system info using the `uname -s` command.""" system_info = {} system_info['name'] = get_cmd_res("uname -s") - if system_info['name'] == 'darwin': - return + return system_info + + +def get_storage_units_list(): + """Get a list of storage units from /sys/block.""" storage_units_list = subprocess.run("ls /sys/block/", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - if storage_units_list.returncode !=0: - return - for unit in storage_units_list.stdout.split('\n'): - if unit == '.' or unit == '..' or unit == '': - continue - if unit.startswith('sr'): - continue - # Scheduler - unit_schedulers = subprocess.run("cat /sys/block/%s/queue/scheduler" % unit, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - if unit_schedulers.returncode == 0: - unit_schedulers = str(unit_schedulers.stdout.strip()) - if unit_schedulers == 'none': - continue - for scheduler in unit_schedulers.split(): - match = re.match(r'^\[([a-z-]+)\]$', scheduler) - if match: - active_schedulers[match.group(1)] = active_schedulers.get(match.group(1), 0) + 1 - # Detect SSD or rotational disks - rotational_storage = 0 - unit_is_rotational = 1 # Default - if ssd: - unit_is_rotational = 0 - else: - unit_is_rotational = subprocess.run("cat /sys/block/%s/queue/rotational" % unit, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - if unit_is_rotational.returncode != 0: - unit_is_rotational = 0 - else: - unit_is_rotational = unit_is_rotational.stdout.strip() - rotational_storage += int(unit_is_rotational) + if storage_units_list.returncode != 0: + return None + return storage_units_list.stdout.split('\n') + + +def should_skip_unit(unit): + """Determine whether a unit should be skipped.""" + return unit == '.' or unit == '..' or unit == '' or unit.startswith('sr') + +def get_unit_info(unit, ssd): + """Get scheduler and rotational information for a specific unit.""" + unit_schedulers = get_unit_schedulers(unit) + if unit_schedulers is None: + return None, 0 + + unit_is_rotational = get_unit_rotational(unit, ssd) + return unit_schedulers, unit_is_rotational + + +def get_unit_schedulers(unit): + """Get the scheduler for a unit.""" + unit_schedulers = subprocess.run(f"cat /sys/block/{unit}/queue/scheduler", shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_schedulers.returncode != 0: + return None + return unit_schedulers.stdout.strip() + + +def get_unit_rotational(unit, ssd): + """Get the rotational info for a unit.""" + if ssd: + return 0 # If ssd is passed, assume it's non-rotational + + unit_is_rotational = subprocess.run(f"cat /sys/block/{unit}/queue/rotational", shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + if unit_is_rotational.returncode != 0: + return 0 + return int(unit_is_rotational.stdout.strip()) + + +def update_active_schedulers(unit_schedulers, active_schedulers): + """Update active schedulers.""" + for scheduler in unit_schedulers.split(): + match = re.match(r'^\[([a-z-]+)\]$', scheduler) + if match: + active_schedulers[match.group(1)] = active_schedulers.get(match.group(1), 0) + 1 + return active_schedulers + + +def detect_hypervisor(): + """Detect if the system is running in a hypervisor.""" hypervisor = None + system_info = get_system_info() + if system_info['name'] != 'darwin': systemd = subprocess.run("systemd-detect-virt --vm", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) @@ -5231,29 +5284,36 @@ def check_io_schedule(ssd=0): dmesg = subprocess.run("dmesg", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) for line in dmesg.stdout.split('\n'): - if re.match('vmware', line, re.IGNORECASE): - hypervisor = 'VMware' - break - elif re.match('kvm', line, re.IGNORECASE): - hypervisor = 'KVM' - break - elif re.match('xen', line, re.IGNORECASE): - hypervisor = 'XEN' - break - elif re.match('vbox', line, re.IGNORECASE): - hypervisor = 'VirtualBox' - break - elif re.match('hyper-v', line, re.IGNORECASE): - hypervisor = 'Hyper-V' + hypervisor = match_hypervisor(line) + if hypervisor: break + return hypervisor + + +def match_hypervisor(line): + """Match hypervisor based on dmesg output.""" + hypervisor_dict = { + 'vmware': 'VMware', + 'kvm': 'KVM', + 'xen': 'XEN', + 'vbox': 'VirtualBox', + 'hyper-v': 'Hyper-V' + } + for keyword, name in hypervisor_dict.items(): + if re.match(keyword, line, re.IGNORECASE): + return name + return None + - if hypervisor is not None and rotational_storage is not None and rotational_storage > 0: +def log_warnings(hypervisor, rotational_storage, active_schedulers): + """Log warnings based on the hypervisor and rotational storage information.""" + if hypervisor is not None and rotational_storage > 0: g_logger.log( - " Warning reason:If openGauss runs in a virtual machine, I cannot know the underlying physical storage type. Use the --ssd arg if the VM only uses SSD storage") + " Warning reason: If openGauss runs in a virtual machine, I cannot know the underlying physical storage type. Use the --ssd arg if the VM only uses SSD storage") if hypervisor is not None and 'cfq' in active_schedulers: g_logger.log( - " Warning reason:The CFQ scheduler is inadequate on a virtual machine (because the hypervisor and/or underlying kernel is already in charge of the I/O scheduling)") + " Warning reason: The CFQ scheduler is inadequate on a virtual machine (because the hypervisor and/or underlying kernel is already in charge of the I/O scheduling)") def check_dependencies(): -- Gitee From 2765d5c1bb359e6075ea5e21db95614fe02b6573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 03:23:08 +0000 Subject: [PATCH 11/87] code check10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/LocalCheckSE.py | 116 ++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 51 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index 9160e357..eaa64ffd 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -5272,24 +5272,26 @@ def detect_hypervisor(): """Detect if the system is running in a hypervisor.""" hypervisor = None system_info = get_system_info() - - if system_info['name'] != 'darwin': - systemd = subprocess.run("systemd-detect-virt --vm", shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, universal_newlines=True) - if systemd.returncode == 0: - systemd = systemd.stdout.strip() - if re.match('\S+', systemd): - hypervisor = systemd - else: - dmesg = subprocess.run("dmesg", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - for line in dmesg.stdout.split('\n'): - hypervisor = match_hypervisor(line) - if hypervisor: - break + if system_info['name'] == 'darwin': + return + systemd = subprocess.run("systemd-detect-virt --vm", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + if systemd.returncode == 0: + systemd = systemd.stdout.strip() + if re.match('\S+', systemd): + hypervisor = systemd + else: + dmesg = subprocess.run("dmesg", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + for line in dmesg.stdout.split('\n'): + hypervisor = match_hypervisor(line) + if hypervisor: + break return hypervisor + + def match_hypervisor(line): """Match hypervisor based on dmesg output.""" hypervisor_dict = { @@ -5316,48 +5318,48 @@ def log_warnings(hypervisor, rotational_storage, active_schedulers): " Warning reason: The CFQ scheduler is inadequate on a virtual machine (because the hypervisor and/or underlying kernel is already in charge of the I/O scheduling)") -def check_dependencies(): - sql_query = """ - SELECT json_agg(json_build_object( - 'name', name, - 'setting', setting, - 'unit', unit, - 'category',category, - 'short_desc', short_desc, - 'extra_desc', extra_desc, - 'context', context, - 'vartype', vartype, - 'source', source, - 'min_val', min_val, - 'max_val', max_val, - 'enumvals', enumvals, - 'boot_val', boot_val, - 'reset_val', reset_val, - 'sourcefile', sourcefile, - 'sourceline', sourceline - )) FROM pg_settings; - """ - port = int(getValueFromFile('port')) +def execute_sql_query(port, sql_query): cmd = f"gsql -d postgres -p '{port}' -r -c \"{sql_query}\"" result = subprocess.run( - ['gsql', '-d', 'postgres', '-p', str(port), '-c',sql_query, '-t', '-A'], + ['gsql', '-d', 'postgres', '-p', str(port), '-c', sql_query, '-t', '-A'], capture_output=True, text=True ) if result.returncode != 0: raise Exception((ErrorCode.GAUSS_505["GAUSS_50502"] % "ConnectionConfiguration") + ("The cmd is : %s" % cmd)) + return json.loads(result.stdout.strip()) - ret = json.loads(result.stdout.strip()) - settings = {} - for row in ret: - settings[row['name']] = row - set_settings(settings) - set_dependency_settings(settings) + +def get_settings(): + sql_query = """ + SELECT json_agg(json_build_object( + 'name', name, + 'setting', setting, + 'unit', unit, + 'category', category, + 'short_desc', short_desc, + 'extra_desc', extra_desc, + 'context', context, + 'vartype', vartype, + 'source', source, + 'min_val', min_val, + 'max_val', max_val, + 'enumvals', enumvals, + 'boot_val', boot_val, + 'reset_val', reset_val, + 'sourcefile', sourcefile, + 'sourceline', sourceline + )) FROM pg_settings; + """ + port = int(getValueFromFile('port')) + settings_data = execute_sql_query(port, sql_query) + settings = {row['name']: row for row in settings_data} + return settings + + +def process_dependencies(rule_files): current_path = os.path.dirname(os.path.realpath(__file__)) - # Determine whether a single node or multiple nodes - local_role_value = get_local_role_value(os.environ['PGDATA']) - rule_files = determine_rule_files(local_role_value) m = MyLexer() m.build() y = MyYACC() @@ -5366,15 +5368,27 @@ def check_dependencies(): for rule_file in rule_files: with open(os.path.join(current_path, rule_file), mode='r', newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) - dependencies = [] - for row in reader: - dependencies.append(row[1]) - dependencies = list(set(dependencies)) + dependencies = {row[1] for row in reader} for dependency in dependencies: try: dependency_parser.parse(dependency) except Exception as e: continue + + +def check_dependencies(): + settings = get_settings() + set_settings(settings) + set_dependency_settings(settings) + + local_role_value = get_local_role_value(os.environ['PGDATA']) + rule_files = determine_rule_files(local_role_value) + + process_dependencies(rule_files) + display_dependency_info() + + +def display_dependency_info(): show_dependency_info() -- Gitee From 98eb45a36f4031dc8e185edd23adabb1ffdde4bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 03:28:00 +0000 Subject: [PATCH 12/87] 11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/LocalCheckSE.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index eaa64ffd..b728f44e 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -5365,15 +5365,29 @@ def process_dependencies(rule_files): y = MyYACC() y.build() dependency_parser = y.yacc + for rule_file in rule_files: - with open(os.path.join(current_path, rule_file), mode='r', newline='', encoding='utf-8') as csvfile: - reader = csv.reader(csvfile) - dependencies = {row[1] for row in reader} - for dependency in dependencies: - try: - dependency_parser.parse(dependency) - except Exception as e: - continue + process_rule_file(current_path, rule_file, dependency_parser) + + +def process_rule_file(current_path, rule_file, dependency_parser): + file_path = os.path.join(current_path, rule_file) + dependencies = read_dependencies(file_path) + parse_dependencies(dependencies, dependency_parser) + + +def read_dependencies(file_path): + with open(file_path, mode='r', newline='', encoding='utf-8') as csvfile: + reader = csv.reader(csvfile) + return {row[1] for row in reader} + + +def parse_dependencies(dependencies, dependency_parser): + for dependency in dependencies: + try: + dependency_parser.parse(dependency) + except Exception as e: + continue def check_dependencies(): -- Gitee From 8374212dcf76be1e681f0620028ef700f60a895a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 07:14:13 +0000 Subject: [PATCH 13/87] 12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/utils.py | 66 +++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/script/local/parser/utils.py b/script/local/parser/utils.py index 539c6b52..ef9dba52 100644 --- a/script/local/parser/utils.py +++ b/script/local/parser/utils.py @@ -149,27 +149,53 @@ def show_advices(): cnt = 0 for category in advices.keys(): print_title_2(category) - if nocolor != 0: - for priority in advices[category].keys(): - for advice in advices[category][priority]: - print('[' + priority.upper() + ']' + advice) - cnt += 1 - else: - for priority in advices[category].keys(): - if priority == 'high': - print('\033[0;31;40m', end='') - elif priority == 'medium': - print('\033[0;33;40m',end='') - elif priority == 'low': - print('\033[0;34;40m',end='') - for advice in advices[category][priority]: - print('[' + priority.upper() + ']' + advice + '\033[0m') - cnt += 1 + cnt += display_advices(category) if cnt == 0: - if nocolor != 0: - print('Everything is OK') - else: - print("\033[0;32;40m Everything is OK \033[0m") + display_no_advices_message() + +def display_advices(category): + global nocolor + cnt = 0 + if nocolor != 0: + for priority in advices[category].keys(): + cnt += display_advices_with_priority(priority, advices[category][priority]) + else: + for priority in advices[category].keys(): + cnt += display_colored_advices(priority, advices[category][priority]) + return cnt + +def display_advices_with_priority(priority, advices_list): + cnt = 0 + for advice in advices_list: + print('[' + priority.upper() + ']' + advice) + cnt += 1 + return cnt + +def display_colored_advices(priority, advices_list): + cnt = 0 + color = get_color(priority) + print(color, end='') + for advice in advices_list: + print('[' + priority.upper() + ']' + advice + '\033[0m') + cnt += 1 + return cnt + +def get_color(priority): + if priority == 'high': + return '\033[0;31;40m' # Red + elif priority == 'medium': + return '\033[0;33;40m' # Yellow + elif priority == 'low': + return '\033[0;34;40m' # Blue + return '\033[0m' # Default color + +def display_no_advices_message(): + global nocolor + if nocolor != 0: + print('Everything is OK') + else: + print("\033[0;32;40m Everything is OK \033[0m") + settings = None def set_settings(sts): -- Gitee From d62af0820e20eb5367cb57223ac0c8f9e821a9b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 07:38:36 +0000 Subject: [PATCH 14/87] 13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/utils.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/script/local/parser/utils.py b/script/local/parser/utils.py index ef9dba52..136f9555 100644 --- a/script/local/parser/utils.py +++ b/script/local/parser/utils.py @@ -212,26 +212,24 @@ def get_setting(name): # standard units def standard_units(value, unit=None): if unit is None and isinstance(value, str): - pattern = r'^-?\d+(\.\d+)?$' + pattern = r'^-?\d+(.\d+)?$' if re.match(pattern, value): return Decimal(value) return value value = Decimal(value) - if unit == 'KB' or unit == 'K' or unit == 'kB': - return value * 1024 - elif unit == '8KB' or unit == '8kB': - return value * 1024 * 8 - elif unit == '16KB' or unit == '16kB': - return value * 1024 * 16 - elif unit == 'MB' or unit == 'M' or unit == 'mB': - return value * 1024 * 1024 - elif unit == 'GB' or unit == 'G' or unit == 'gB': - return value * 1024 * 1024 * 1024 - elif unit == 'TB' or unit == 'T' or unit == 'tB': - return value * 1024 * 1024 * 1024 * 1024 - elif unit == 'PB' or unit == 'P' or unit == 'pB': - return value * 1024 * 1024 * 1024 * 1024 * 1024 - elif unit == 's': + unit_multipliers = { + 'KB': 1024, 'K': 1024, 'kB': 1024, + '8KB': 1024 * 8, '8kB': 1024 * 8, + '16KB': 1024 * 16, '16kB': 1024 * 16, + 'MB': 1024 * 1024, 'M': 1024 * 1024, 'mB': 1024 * 1024, + 'GB': 1024 * 1024 * 1024, 'G': 1024 * 1024 * 1024, 'gB': 1024 * 1024 * 1024, + 'TB': 1024 * 1024 * 1024 * 1024, 'T': 1024 * 1024 * 1024 * 1024, 'tB': 1024 * 1024 * 1024 * 1024, + 'PB': 1024 * 1024 * 1024 * 1024 * 1024, 'P': 1024 * 1024 * 1024 * 1024 * 1024, + 'pB': 1024 * 1024 * 1024 * 1024 * 1024, + } + if unit in unit_multipliers: + return value * unit_multipliers[unit] + if unit == 's': return str(value) + 's' elif unit == 'ms': return str(value) + 'ms' -- Gitee From 585c91957cbe3c6ac12334c9c628be060cfcb262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 07:46:09 +0000 Subject: [PATCH 15/87] 15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/parsetab.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py index 2a4b8fef..9b7ddbb1 100644 --- a/script/local/parser/parsetab.py +++ b/script/local/parser/parsetab.py @@ -28,18 +28,20 @@ _lr_action_items = {'NOT':([0,8,18,19,29,],[5,5,5,5,5,]),'LPAREN':([0,5,8,18,19, _lr_action = {} for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = {} - _lr_action[_x][_k] = _y + for _x, _y in zip(_v[0], _v[1]): + if _x not in _lr_action: + _lr_action[_x] = {} + _lr_action[_x][_k] = _y del _lr_action_items _lr_goto_items = {'sentence':([0,],[1,]),'conditions':([0,8,29,],[2,30,30,]),'and_conditions':([0,8,18,29,],[3,3,37,3,]),'not_conditions':([0,8,18,19,29,],[4,4,4,38,4,]),'cdt':([0,5,8,18,19,29,],[6,20,6,6,6,6,]),'expr':([0,5,8,18,19,21,22,23,24,25,26,29,40,53,58,],[7,7,31,7,7,39,41,42,43,44,45,31,54,56,59,]),'term':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,40,53,58,],[9,9,9,9,9,9,9,9,9,9,9,46,47,9,9,9,9,]),'factor':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[10,10,10,10,10,10,10,10,10,10,10,10,10,10,50,51,52,10,10,10,]),'function':([17,],[35,]),'variables':([53,],[55,]),} _lr_goto = {} for _k, _v in _lr_goto_items.items(): - for _x, _y in zip(_v[0], _v[1]): - if not _x in _lr_goto: _lr_goto[_x] = {} - _lr_goto[_x][_k] = _y + for _x, _y in zip(_v[0], _v[1]): + if _x not in _lr_goto: + _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y del _lr_goto_items _lr_productions = [ ("S' -> sentence","S'",1,None,None,None), -- Gitee From 75d889caa45d490e99c70997558952518a16b5e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 07:51:47 +0000 Subject: [PATCH 16/87] 17 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myYACC.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 0d692881..968170c0 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -16,8 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : CheckConfig.py is a local utility to -# execute some functions about init instance +# Description : CheckInstall.py is a utility to install Gauss MPP Database. ############################################################################# import os import sys -- Gitee From db14d19427ac9038f24eea5a92ec72db65772936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 07:52:16 +0000 Subject: [PATCH 17/87] 18 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myLexer.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/script/local/parser/myLexer.py b/script/local/parser/myLexer.py index a33c5a38..065d5a6d 100644 --- a/script/local/parser/myLexer.py +++ b/script/local/parser/myLexer.py @@ -16,8 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : CheckConfig.py is a local utility to -# execute some functions about init instance +# Description : CheckInstall.py is a utility to install Gauss MPP Database. ############################################################################# import os import sys @@ -139,10 +138,10 @@ class MyLexer(): self.lexer = lex(module=self, **kwargs) # Test it output - def test(self,data): + def test(self, data): self.lexer.input(data) while True: - tok = self.lexer.token() - if not tok: - break - print(tok) + tok = self.lexer.token() + if not tok: + break + print(tok) -- Gitee From 588286d5c7c34fedac0512fbaf91f056f395fa62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 08:43:48 +0000 Subject: [PATCH 18/87] 19 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/lex.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index e1732454..40b15ba9 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -104,14 +104,14 @@ class Lexer: self.lexmodule = None # Module self.lineno = 1 # Current line number - def clone(self, object=None): + def clone(self, obj=None): c = copy.copy(self) - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in + # If the obj parameter has been supplied, it means we are attaching the + # lexer to a new obj. In this case, we have to rebind all methods in # the lexstatere and lexstateerrorf tables. - if object: + if obj: newtab = {} for key, ritem in self.lexstatere.items(): newre = [] @@ -121,14 +121,14 @@ class Lexer: if not f or not f[0]: newfindex.append(f) continue - newfindex.append((getattr(object, f[0].__name__), f[1])) + newfindex.append((getattr(obj, f[0].__name__), f[1])) newre.append((cre, newfindex)) newtab[key] = newre c.lexstatere = newtab c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object, ef.__name__) - c.lexmodule = object + c.lexstateerrorf[key] = getattr(obj, ef.__name__) + c.lexmodule = obj return c # ------------------------------------------------------------ @@ -692,7 +692,7 @@ class LexerReflect(object): # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(*, module=None, object=None, debug=False, +def lex(*, module=None, obj=None, debug=False, reflags=int(re.VERBOSE), debuglog=None, errorlog=None): global lexer @@ -710,8 +710,8 @@ def lex(*, module=None, object=None, debug=False, debuglog = Logger(sys.stderr) # Get the module dictionary used for the lexer - if object: - module = object + if obj: + module = obj # Get the module dictionary used for the parser if module: -- Gitee From b7113c8352135bd1b0cca4a011265117b29c6820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 09:08:37 +0000 Subject: [PATCH 19/87] 20 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/lex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 40b15ba9..69c17bee 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -868,7 +868,7 @@ def runmain(lexer=None, data=None): # when its docstring might need to be set in an alternative way # ----------------------------------------------------------------------------- -def TOKEN(r): +def token(r): def set_regex(f): if hasattr(r, '__call__'): f.regex = _get_regex(r) -- Gitee From b2bf6e4c077600dffdcf99ea278e302b73bf2cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 09:08:59 +0000 Subject: [PATCH 20/87] 21 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/local/parser/variables.py b/script/local/parser/variables.py index 14197536..5dc79f0a 100644 --- a/script/local/parser/variables.py +++ b/script/local/parser/variables.py @@ -20,11 +20,11 @@ ############################################################################# import sys import os +from decimal import Decimal localDirPath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(sys.path[0] + "/../") from local.parser.utils import get_dependency_setting -from decimal import Decimal variable_dict = { 'udf_memory_limit' : 300, -- Gitee From d9be31bcff0486d0d0d6800c68b471b752b7da8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 4 Dec 2024 09:21:20 +0000 Subject: [PATCH 21/87] 21 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myYACC.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 968170c0..49d640ee 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -16,7 +16,8 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : CheckInstall.py is a utility to install Gauss MPP Database. +# Description : CheckConfig.py is a local utility to +# execute some functions about init instance ############################################################################# import os import sys @@ -158,17 +159,20 @@ class MyYACC(): def p_function(p): 'function : ID LPAREN variables RPAREN' p[0] = (get_function(p[1]), p[3]) + def p_variables_comma(p): ''' variables : variables COMMA expr ''' p[1].append(p[3]) p[0] = p[1] + def p_variables_factor(p): 'variables : expr' p[0] = [p[1]] #Error rule for syntax errors + @staticmethod def p_error(p): raise Exception('Syntax error in input!') -- Gitee From 3e9071c83ad683db9ca8b95574c90c760194f9b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 06:21:08 +0000 Subject: [PATCH 22/87] 22 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/yacc.py | 399 +++++++++++++++++++----------------- 1 file changed, 209 insertions(+), 190 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 572b28d0..7627bce6 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -23,21 +23,22 @@ import types import sys import inspect -#----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # === User configurable parameters === # # Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- -YACC_DEBUG = False # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory +YACC_DEBUG = False # Debugging mode. If set, yacc generates a +# a 'parser.out' file in the current directory -DEBUG_FILE = 'parser.out' # Default name of the debugging file -ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode -RESULT_LIMIT = 40 # Size limit of results when running in debug mode. +DEBUG_FILE = 'parser.out' # Default name of the debugging file +ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode +RESULT_LIMIT = 40 # Size limit of results when running in debug mode. MAXINT = sys.maxsize + class Logger(object): def __init__(self, f): self.f = f @@ -55,6 +56,7 @@ class Logger(object): critical = debug + # Null logger is used when no output is generated. Does nothing. class NullLogger(object): def __getattribute__(self, name): @@ -63,10 +65,12 @@ class NullLogger(object): def __call__(self, *args, **kwargs): return self + # Exception raised for yacc-related errors class YaccError(Exception): pass + # Format the result message that the parser produces when running in debug mode. def format_result(r): repr_str = repr(r) @@ -77,6 +81,7 @@ def format_result(r): result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result + # Format stack entries when the parser is running in debug mode def format_stack_entry(r): repr_str = repr(r) @@ -87,13 +92,14 @@ def format_stack_entry(r): else: return '<%s @ 0x%x>' % (type(r).__name__, id(r)) -#----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- # === LR Parsing Engine === # # The following classes are used for the LR parser itself. These are not # used during table construction and are independent of the actual LR # table generation algorithm -#----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # This class is used to hold non-terminal grammar symbols during parsing. # It normally has the following attributes set: @@ -105,6 +111,7 @@ class YaccSymbol: def __repr__(self): return str(self) + # This class is a wrapper around the objects actually passed to each # grammar rule. Index lookup and assignment actually assign the # .value attribute of the underlying YaccSymbol object. @@ -163,6 +170,7 @@ class YaccProduction: def error(self): raise SyntaxError + # ----------------------------------------------------------------------------- # == LRParser == # @@ -219,14 +227,14 @@ class LRParser: if isinstance(debug, int) and debug: debug = Logger(sys.stderr) - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery if debug: debug.info('PARSE DEBUG START') @@ -248,10 +256,10 @@ class LRParser: get_token = self.token = lexer.token # Set up the state and symbol stacks - statestack = self.statestack = [] # Stack of parsing states - symstack = self.symstack = [] # Stack of grammar symbols - pslice.stack = symstack # Put in the production - errtoken = None # Err token + statestack = self.statestack = [] # Stack of parsing states + symstack = self.symstack = [] # Stack of grammar symbols + pslice.stack = symstack # Put in the production + errtoken = None # Err token # The start state is assumed to be (0,$end) @@ -271,7 +279,7 @@ class LRParser: if state not in defaulted_states: if not lookahead: if not lookaheadstack: - lookahead = get_token() # Get the next token + lookahead = get_token() # Get the next token else: lookahead = lookaheadstack.pop() if not lookahead: @@ -315,20 +323,20 @@ class LRParser: # Get production function sym = YaccSymbol() - sym.type = pname # Production name + sym.type = pname # Production name sym.value = None if debug: if plen: debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, - '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', - goto[statestack[-1-plen]][pname]) + '[' + ','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + ']', + goto[statestack[-1 - plen]][pname]) else: debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], goto[statestack[-1]][pname]) if plen: - targ = symstack[-plen-1:] + targ = symstack[-plen - 1:] targ[0] = sym if tracking: @@ -359,9 +367,9 @@ class LRParser: statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) # Save the current lookahead token - symstack.extend(targ[1:-1]) # Put the production slice back on the stack - statestack.pop() # Pop back one state (before the reduce) + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' sym.value = 'error' @@ -397,8 +405,8 @@ class LRParser: statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) # Save the current lookahead token - statestack.pop() # Pop back one state (before the reduce) + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' sym.value = 'error' @@ -439,7 +447,7 @@ class LRParser: self.errorok = False errtoken = lookahead if errtoken.type == '$end': - errtoken = None # End of file! + errtoken = None # End of file! if self.errorfunc: if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer @@ -524,6 +532,7 @@ class LRParser: # If we'r here, something really bad happened raise RuntimeError('yacc: internal parser error!!!\n') + # ----------------------------------------------------------------------------- # === Grammar Representation === # @@ -534,6 +543,7 @@ class LRParser: # regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + # ----------------------------------------------------------------------------- # class Production: # @@ -560,6 +570,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): self.name = name self.prod = tuple(prod) @@ -572,7 +583,7 @@ class Production(object): # Internal settings used during table construction - self.len = len(self.prod) # Length of the production + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production self.usyms = [] @@ -612,11 +623,11 @@ class Production(object): p = LRItem(self, n) # Precompute the list of productions immediately following. try: - p.lr_after = self.Prodnames[p.prod[n+1]] + p.lr_after = self.Prodnames[p.prod[n + 1]] except (IndexError, KeyError): p.lr_after = [] try: - p.lr_before = p.prod[n-1] + p.lr_before = p.prod[n - 1] except IndexError: p.lr_before = None return p @@ -626,6 +637,7 @@ class Production(object): if self.func: self.callable = pdict[self.func] + # ----------------------------------------------------------------------------- # class LRItem # @@ -672,6 +684,7 @@ class LRItem(object): def __repr__(self): return 'LRItem(' + str(self) + ')' + # ----------------------------------------------------------------------------- # # Return the rightmost terminal from a list of symbols. Used in add_production() @@ -684,6 +697,7 @@ def rightmost_terminal(symbols, terminals): i -= 1 return None + # ----------------------------------------------------------------------------- # === GRAMMAR CLASS === # @@ -695,42 +709,42 @@ def rightmost_terminal(symbols, terminals): class GrammarError(YaccError): pass + class Grammar(object): def __init__(self, terminals): self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar + # entry is always reserved for the purpose of + # building an augmented grammar - self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. - self.Prodmap = {} # A dictionary that is only used to detect duplicate - # productions. + self.Prodmap = {} # A dictionary that is only used to detect duplicate + # productions. - self.Terminals = {} # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. for term in terminals: self.Terminals[term] = [] self.Terminals['error'] = [] - self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - self.First = {} # A dictionary of precomputed FIRST(x) symbols + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. - self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) - self.Start = None # Starting symbol for the grammar + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + self.Start = None # Starting symbol for the grammar def __len__(self): return len(self.Productions) @@ -808,7 +822,7 @@ class Grammar(object): raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) else: self.UsedPrecedence.add(precname) - del syms[-2:] # Drop %prec from the rule + del syms[-2:] # Drop %prec from the rule else: # If no %prec, precedence is determined by the rightmost terminal symbol precname = rightmost_terminal(syms, self.Terminals) @@ -1108,7 +1122,7 @@ class Grammar(object): for i, B in enumerate(p.prod): if B in self.Nonterminals: # Okay. We got a non-terminal in a production - fst = self._first(p.prod[i+1:]) + fst = self._first(p.prod[i + 1:]) hasempty = False for f in fst: if f != '' and f not in self.Follow[B]: @@ -1116,7 +1130,7 @@ class Grammar(object): didadd = True if f == '': hasempty = True - if hasempty or i == (len(p.prod)-1): + if hasempty or i == (len(p.prod) - 1): # Add elements of follow(a) to follow(b) for f in self.Follow[p.name]: if f not in self.Follow[B]: @@ -1126,7 +1140,6 @@ class Grammar(object): break return self.Follow - # ----------------------------------------------------------------------------- # # This function walks the list of productions and builds a complete set of the @@ -1153,11 +1166,11 @@ class Grammar(object): lri = LRItem(p, i) # Precompute the list of productions immediately following try: - lri.lr_after = self.Prodnames[lri.prod[i+1]] + lri.lr_after = self.Prodnames[lri.prod[i + 1]] except (IndexError, KeyError): lri.lr_after = [] try: - lri.lr_before = lri.prod[i-1] + lri.lr_before = lri.prod[i - 1] except IndexError: lri.lr_before = None @@ -1169,6 +1182,7 @@ class Grammar(object): i += 1 p.lr_items = lr_items + # ----------------------------------------------------------------------------- # === LR Generator === # @@ -1201,13 +1215,14 @@ def digraph(X, R, FP): traverse(x, N, stack, F, X, R, FP) return F + def traverse(x, N, stack, F, X, R, FP): stack.append(x) d = len(stack) N[x] = d - F[x] = FP(x) # F(X) <- F'(x) + F[x] = FP(x) # F(X) <- F'(x) - rel = R(x) # Get y's related to x + rel = R(x) # Get y's related to x for y in rel: if N[y] == 0: traverse(y, N, stack, F, X, R, FP) @@ -1224,6 +1239,7 @@ def traverse(x, N, stack, F, X, R, FP): F[stack[-1]] = F[x] element = stack.pop() + class LALRError(YaccError): pass @@ -1245,18 +1261,18 @@ class LRTable: self.log = log # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures - self._add_count = 0 # Internal counter used to detect cycles + self._add_count = 0 # Internal counter used to detect cycles # Diagnostic information filled in by the table generator self.sr_conflict = 0 self.rr_conflict = 0 - self.conflicts = [] # List of conflicts + self.conflicts = [] # List of conflicts self.sr_conflicts = [] self.rr_conflicts = [] @@ -1423,7 +1439,7 @@ class LRTable: for stateno, state in enumerate(C): for p in state: if p.lr_index < p.len - 1: - t = (stateno, p.prod[p.lr_index+1]) + t = (stateno, p.prod[p.lr_index + 1]) if t[1] in self.grammar.Nonterminals: if t not in trans: trans.append(t) @@ -1444,7 +1460,7 @@ class LRTable: g = self.lr0_goto(C[state], N) for p in g: if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] + a = p.prod[p.lr_index + 1] if a in self.grammar.Terminals: if a not in terms: terms.append(a) @@ -1500,8 +1516,8 @@ class LRTable: # ----------------------------------------------------------------------------- def compute_lookback_includes(self, C, trans, nullable): - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations # Make a dictionary of non-terminal transitions dtrans = {} @@ -1534,7 +1550,7 @@ class LRTable: li = lr_index + 1 while li < p.len: if p.prod[li] in self.grammar.Terminals: - break # No forget it + break # No forget it if p.prod[li] not in nullable: break li = li + 1 @@ -1542,8 +1558,8 @@ class LRTable: # Appears to be a relation between (j,t) and (state,N) includes.append((j, t)) - g = self.lr0_goto(C[j], t) # Go to next set - j = self.lr0_cidhash.get(id(g), -1) # Go to next state + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state # When we get here, j is the final state, now we have to locate the production for r in C[j]: @@ -1554,7 +1570,7 @@ class LRTable: i = 0 # This look is comparing a production ". A B C" with "A B C ." while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: + if r.prod[i] != p.prod[i + 1]: break i = i + 1 else: @@ -1652,11 +1668,11 @@ class LRTable: def lr_parse_table(self): Productions = self.grammar.Productions Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array - action = self.lr_action # Action array - log = self.log # Logger for output + goto = self.lr_goto # Goto array + action = self.lr_action # Action array + log = self.log # Logger for output - actionp = {} # Action production array (temporary) + actionp = {} # Action production array (temporary) # This determines the number of states @@ -1667,7 +1683,7 @@ class LRTable: st = 0 for I in C: # Loop over each production in I - actlist = [] # List of actions + actlist = [] # List of actions st_action = {} st_actionp = {} st_goto = {} @@ -1679,115 +1695,115 @@ class LRTable: log.info('') for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action['$end'] = 0 - st_actionp['$end'] = p - else: - # We are at the end of a production. Reduce! - laheads = p.lookaheads[st] - for a in laheads: - actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) - r = st_action.get(a) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ('right', 0)) - - # Reduce precedence comes from rule being reduced (p) - rprec, rlevel = Productions[p.number].prec - - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp, rejectp = pp, oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp, rejectp = oldp, pp - self.rr_conflicts.append((st, chosenp, rejectp)) - log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', - a, st_actionp[a].number, st_actionp[a]) + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + # We are at the end of a production. Reduce! + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp, rejectp = pp, oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 else: - raise LALRError('Unknown conflict in state %d' % st) + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I, a) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - # We are in a shift state - actlist.append((a, p, 'shift and go to state %d' % j)) - r = st_action.get(a) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError('Shift/shift conflict in state %d' % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ('right', 0)) - - # Reduce precedence comes from the rule that could have been reduced - rprec, rlevel = Productions[st_actionp[a].number].prec - - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) - + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i + 1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + # We are in a shift state + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError('Shift/shift conflict in state %d' % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None else: - raise LALRError('Unknown conflict in state %d' % st) + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + else: - st_action[a] = j - st_actionp[a] = p + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = j + st_actionp[a] = p # Print the actions associated with each terminal _actprint = {} @@ -1828,6 +1844,7 @@ class LRTable: goto[st] = st_goto st += 1 + # ----------------------------------------------------------------------------- # # This function returns a dictionary containing all of the symbols defined within @@ -1842,6 +1859,7 @@ def get_caller_module_dict(levels): ldict.update(f.f_locals) return ldict + # ----------------------------------------------------------------------------- # # This takes a raw grammar rule string and parses it into production data @@ -1880,6 +1898,7 @@ def parse_grammar(doc, file, line): return grammar + # ----------------------------------------------------------------------------- # # This class represents information extracted for building a parser including @@ -2072,7 +2091,7 @@ class ParserReflect(object): self.log.error('precedence items must be strings') self.error = True return - preclist.append((term, assoc, level+1)) + preclist.append((term, assoc, level + 1)) self.preclist = preclist # Get all p_functions from the grammar @@ -2145,7 +2164,7 @@ class ParserReflect(object): if n.startswith('p_') and n != 'p_error': self.log.warning('%r not defined as a function', n) if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or - (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): if v.__doc__: try: doc = v.__doc__.split(' ') @@ -2157,6 +2176,7 @@ class ParserReflect(object): self.grammar = grammar + # ----------------------------------------------------------------------------- # # Build a parser @@ -2165,7 +2185,6 @@ class ParserReflect(object): def yacc(*, debug=YACC_DEBUG, module=None, start=None, check_recursion=True, optimize=False, debugfile=DEBUG_FILE, debuglog=None, errorlog=None): - # Reference to the parsing method of the last built parser global parse -- Gitee From c3b1f05d5918a8a548296dfe01ba19e7110ab128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 07:55:16 +0000 Subject: [PATCH 23/87] 23 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/lex.py | 513 ++++++++++++++++++++----------------- 1 file changed, 276 insertions(+), 237 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 69c17bee..db0707ca 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -22,7 +22,6 @@ import re import sys import types import copy -import os import inspect # This tuple contains acceptable string types @@ -107,30 +106,38 @@ class Lexer: def clone(self, obj=None): c = copy.copy(self) - # If the obj parameter has been supplied, it means we are attaching the - # lexer to a new obj. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - if obj: - newtab = {} - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(obj, f[0].__name__), f[1])) - newre.append((cre, newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = {} - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(obj, ef.__name__) + c.lexstatere = self._rebind_lexstatere(obj) + c.lexstateerrorf = self._rebind_lexstateerrorf(obj) c.lexmodule = obj + return c + def _rebind_lexstatere(self, obj): + newtab = {} + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = self._rebind_findex(obj, findex) + newre.append((cre, newfindex)) + newtab[key] = newre + return newtab + + def _rebind_findex(self, obj, findex): + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(obj, f[0].__name__), f[1])) + return newfindex + + def _rebind_lexstateerrorf(self, obj): + newtab = {} + for key, ef in self.lexstateerrorf.items(): + newtab[key] = getattr(obj, ef.__name__) + return newtab + # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ @@ -198,96 +205,117 @@ class Lexer: continue # Look for a regular expression match - for lexre, lexindexfunc in self.lexre: - m = lexre.match(lexdata, lexpos) - if not m: - continue - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - - i = m.lastindex - func, tok.type = lexindexfunc[i] - - if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break - - lexpos = m.end() - - # If token is processed by a function, call it - - tok.lexer = self # Set additional attributes useful in token rules - self.lexmatch = m - self.lexpos = lexpos - newtok = func(tok) - del tok.lexer - del self.lexmatch - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - lexignore = self.lexignore # This is here in case there was a state change - break - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok + tok = self._process_regex_matches(lexpos) + if tok: + return tok - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = 'error' - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}", - lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: - continue - return newtok + # No match, see if in literals + tok = self._process_literals(lexpos) + if tok: + return tok + + # No match. Call t_error() if defined. + tok = self._handle_error(lexpos) + if tok: + return tok - self.lexpos = lexpos - raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", - lexdata[lexpos:]) + self.lexpos = lexpos + raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", + lexdata[lexpos:]) if self.lexeoff: + tok = self._process_eof() + return tok + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError('No input string given with input()') + return None + + def _process_regex_matches(self, lexpos): + for lexre, lexindexfunc in self.lexre: + m = lexre.match(self.lexdata, lexpos) + if not m: + continue + + # Create a token for return tok = LexToken() - tok.type = 'eof' - tok.value = '' + tok.value = m.group() tok.lineno = self.lineno tok.lexpos = lexpos - tok.lexer = self + + i = m.lastindex + func, tok.type = lexindexfunc[i] + + if not func: + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break + + lexpos = m.end() + + # If token is processed by a function, call it + tok.lexer = self # Set additional attributes useful in token rules + self.lexmatch = m self.lexpos = lexpos - newtok = self.lexeoff(tok) + newtok = func(tok) + del tok.lexer + del self.lexmatch + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + lexignore = self.lexignore # This is here in case there was a state change + break return newtok + return None - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError('No input string given with input()') + def _process_literals(self, lexpos): + if self.lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = self.lexdata[lexpos] + tok.lineno = self.lineno + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok return None + def _handle_error(self, lexpos): + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = 'error' + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError(f"Scanning error. Illegal character {self.lexdata[lexpos]!r}", + self.lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: + return None + return newtok + return None + + def _process_eof(self): + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = self.lexpos + tok.lexer = self + self.lexpos = self.lexpos + newtok = self.lexeoff(tok) + return newtok + # Iterator interface def __iter__(self): return self @@ -329,29 +357,14 @@ def get_caller_module_dict(levels): # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- + def _form_master_re(relist, reflags, ldict, toknames): if not relist: return [], [], [] regex = '|'.join(relist) try: lexre = re.compile(regex, reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) - lexindexnames = lexindexfunc[:] - - for f, i in lexre.groupindex.items(): - handle = ldict.get(f, None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle, toknames[f]) - lexindexnames[i] = f - elif handle is not None: - lexindexnames[i] = f - if f.find('ignore_') > 0: - lexindexfunc[i] = (None, None) - else: - lexindexfunc[i] = (None, toknames[f]) - + lexindexfunc, lexindexnames = _handle_groupindex(lexre, ldict, toknames) return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception: m = (len(relist) // 2) + 1 @@ -359,6 +372,24 @@ def _form_master_re(relist, reflags, ldict, toknames): rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) return (llist+rlist), (lre+rre), (lnames+rnames) +def _handle_groupindex(lexre, ldict, toknames): + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) + lexindexnames = lexindexfunc[:] + + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle, toknames[f]) + lexindexnames[i] = f + elif handle is not None: + lexindexnames[i] = f + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) + else: + lexindexfunc[i] = (None, toknames[f]) + + return lexindexfunc, lexindexnames + # ----------------------------------------------------------------------------- # def _statetoken(s,names) # @@ -498,12 +529,12 @@ class LexerReflect(object): tsymbols = [f for f in self.ldict if f[:2] == 't_'] # Now build up a list of functions and a list of strings - self.toknames = {} # Mapping of symbols to token names - self.funcsym = {} # Symbols defined as functions - self.strsym = {} # Symbols defined as strings - self.ignore = {} # Ignore strings by state - self.errorf = {} # Error functions by state - self.eoff = {} # EOF functions by state + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: self.funcsym[s] = [] @@ -520,33 +551,9 @@ class LexerReflect(object): self.toknames[f] = tokname if hasattr(t, '__call__'): - if tokname == 'error': - for s in states: - self.errorf[s] = t - elif tokname == 'eof': - for s in states: - self.eoff[s] = t - elif tokname == 'ignore': - line = t.__code__.co_firstlineno - file = t.__code__.co_filename - self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) - self.error = True - else: - for s in states: - self.funcsym[s].append((f, t)) + self.process_function_rule(f, t, states, tokname) elif isinstance(t, StringTypes): - if tokname == 'ignore': - for s in states: - self.ignore[s] = t - if '\\' in t: - self.log.warning("%s contains a literal backslash '\\'", f) - - elif tokname == 'error': - self.log.error("Rule %r must be defined as a function", f) - self.error = True - else: - for s in states: - self.strsym[s].append((f, t)) + self.process_string_rule(f, t, states, tokname) else: self.log.error('%s not defined as a function or string', f) self.error = True @@ -559,101 +566,139 @@ class LexerReflect(object): for s in self.strsym.values(): s.sort(key=lambda x: len(x[1]), reverse=True) + def process_function_rule(self, f, t, states, tokname): + if tokname == 'error': + for s in states: + self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t + elif tokname == 'ignore': + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) + self.error = True + else: + for s in states: + self.funcsym[s].append((f, t)) + + def process_string_rule(self, f, t, states, tokname): + if tokname == 'ignore': + for s in states: + self.ignore[s] = t + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) + elif tokname == 'error': + self.log.error("Rule %r must be defined as a function", f) + self.error = True + else: + for s in states: + self.strsym[s].append((f, t)) + # Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: - # Validate all rules defined by functions + self._validate_func_rules(state) + self._validate_str_rules(state) + self._validate_no_rules(state) + self._validate_error_function(state) + + for module in self.modules: + self.validate_module(module) + + def _validate_func_rules(self, state): + for fname, f in self.funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + if not self._validate_function(f, file, line): + continue - for fname, f in self.funcsym[state]: - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) + self.error = True + continue - tokname = self.toknames[fname] - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) - self.error = True - continue + self._validate_regex(f, fname, file, line) - if nargs < reqargs: - self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) - self.error = True - continue + def _validate_function(self, f, file, line): + """Validates the number of arguments and logs errors if needed.""" + tokname = self.toknames[f.__name__] + reqargs = 2 if isinstance(f, types.MethodType) else 1 + nargs = f.__code__.co_argcount - if not _get_regex(f): - self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) - self.error = True - continue + if nargs > reqargs: + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True + return False - try: - c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) - if c.match(''): - self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) - self.error = True - except re.error as e: - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) - if '#' in _get_regex(f): - self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) - self.error = True + if nargs < reqargs: + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True + return False - # Validate all rules defined by strings - for name, r in self.strsym[state]: - tokname = self.toknames[name] - if tokname == 'error': - self.log.error("Rule %r must be defined as a function", name) - self.error = True - continue + return True - if tokname not in self.tokens and tokname.find('ignore_') < 0: - self.log.error("Rule %r defined for an unspecified token %s", name, tokname) - self.error = True - continue + def _validate_regex(self, f, fname, file, line): + """Validates the regular expression of a function.""" + try: + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) + self.error = True - try: - c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) - if (c.match('')): - self.log.error("Regular expression for rule %r matches empty string", name) - self.error = True - except re.error as e: - self.log.error("Invalid regular expression for rule %r. %s", name, e) - if '#' in r: - self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) - self.error = True + def _validate_str_rules(self, state): + for name, r in self.strsym[state]: + tokname = self.toknames[name] + if tokname == 'error': + self.log.error("Rule %r must be defined as a function", name) + self.error = True + continue - if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state %r", state) + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule %r defined for an unspecified token %s", name, tokname) self.error = True + continue - # Validate the error function - efunc = self.errorf.get(state, None) - if efunc: - f = efunc - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) - self.error = True + self._validate_regex_str(name, r) - if nargs < reqargs: - self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) - self.error = True + def _validate_regex_str(self, name, r): + """Validates the regular expression defined by a string.""" + try: + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if c.match(''): + self.log.error("Regular expression for rule %r matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule %r. %s", name, e) + if '#' in r: + self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) + self.error = True - for module in self.modules: - self.validate_module(module) + def _validate_no_rules(self, state): + """Logs an error if no rules are defined for a state.""" + if not self.funcsym[state] and not self.strsym[state]: + self.log.error("No rules defined for state %r", state) + self.error = True + + def _validate_error_function(self, state): + """Validates the error function for the state.""" + efunc = self.errorf.get(state, None) + if efunc: + line = efunc.__code__.co_firstlineno + file = efunc.__code__.co_filename + module = inspect.getmodule(efunc) + self.modules.add(module) + + if not self._validate_function(efunc, file, line): + return # ----------------------------------------------------------------------------- # @@ -844,19 +889,13 @@ def runmain(lexer=None, data=None): except IndexError: sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() + local_input = lexer.input if lexer else input + local_token = lexer.token if lexer else token - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token + local_input(data) while True: - tok = _token() + tok = local_token() if not tok: break sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') -- Gitee From 89d3229905588866b90a94070637a03696b43c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 08:25:45 +0000 Subject: [PATCH 24/87] 25 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/lex.py | 113 +++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index db0707ca..2f6a2c0f 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -123,6 +123,7 @@ class Lexer: newtab[key] = newre return newtab + @staticmethod def _rebind_findex(self, obj, findex): newfindex = [] for f in findex: @@ -498,30 +499,33 @@ class LexerReflect(object): def get_states(self): self.states = self.ldict.get('states', None) # Build statemap - if self.states: - if not isinstance(self.states, (tuple, list)): - self.log.error('states must be defined as a tuple or list') + if not self.states: + return + + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + return + + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) self.error = True - else: - for s in self.states: - if not isinstance(s, tuple) or len(s) != 2: - self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) - self.error = True - continue - name, statetype = s - if not isinstance(name, StringTypes): - self.log.error('State name %r must be a string', name) - self.error = True - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) - self.error = True - continue - if name in self.stateinfo: - self.log.error("State %r already defined", name) - self.error = True - continue - self.stateinfo[name] = statetype + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %r must be a string', name) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State %r already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various @@ -745,7 +749,7 @@ def lex(*, module=None, obj=None, debug=False, ldict = None stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() - global token, input + global token, lex_input if errorlog is None: errorlog = Logger(sys.stderr) @@ -796,27 +800,10 @@ def lex(*, module=None, obj=None, debug=False, # Get the stateinfo dictionary stateinfo = linfo.stateinfo - regexs = {} - # Build the master regular expressions - for state in stateinfo: - regex_list = [] - - # Add rules defined by functions first - for fname, f in linfo.funcsym[state]: - regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) - - # Now add all of the simple rules - for name, r in linfo.strsym[state]: - regex_list.append('(?P<%s>%s)' % (name, r)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) - - regexs[state] = regex_list + # Call the helper function to build regex rules + regexs = build_regexs(linfo, stateinfo, debug, debuglog) # Build the master regular expressions - if debug: debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') @@ -856,6 +843,40 @@ def lex(*, module=None, obj=None, debug=False, lexobj.lexeoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules + check_state_info(stateinfo, linfo, errorlog, lexobj) + + # Create global versions of the token() and input() functions + token = lexobj.token + lex_input = lexobj.input + lexer = lexobj + +def build_regexs(linfo, stateinfo, debug, debuglog): + """ + Helper function to build the regex dictionary from state information. + """ + regexs = {} + for state in stateinfo: + regex_list = [] + + # Add rules defined by functions first + for fname, f in linfo.funcsym[state]: + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) + + # Now add all of the simple rules + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) + + regexs[state] = regex_list + return regexs + +def check_state_info(stateinfo, linfo, errorlog, lexobj): + """ + Helper function to check state information for error and ignore rules. + """ for s, stype in stateinfo.items(): if stype == 'exclusive': if s not in linfo.errorf: @@ -868,12 +889,6 @@ def lex(*, module=None, obj=None, debug=False, if s not in linfo.ignore: linfo.ignore[s] = linfo.ignore.get('INITIAL', '') - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - return lexobj # ----------------------------------------------------------------------------- # -- Gitee From c147226e1f0196201f2b72adf85aa4899290e900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 08:35:14 +0000 Subject: [PATCH 25/87] 26 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/lex.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 2f6a2c0f..808f1470 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -895,7 +895,7 @@ def check_state_info(stateinfo, linfo, errorlog, lexobj): # This runs the lexer as a main program # ----------------------------------------------------------------------------- -def runmain(lexer=None, data=None): +def runmain(lexer_instance=None, data=None): if not data: try: filename = sys.argv[1] @@ -904,8 +904,10 @@ def runmain(lexer=None, data=None): except IndexError: sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() - local_input = lexer.input if lexer else input - local_token = lexer.token if lexer else token + + # 修改为新的变量名称 + local_input = lexer_instance.input if lexer_instance else input + local_token = lexer_instance.token if lexer_instance else token local_input(data) @@ -915,6 +917,7 @@ def runmain(lexer=None, data=None): break sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') + # ----------------------------------------------------------------------------- # @TOKEN(regex) # -- Gitee From 4ec814e78574a1cb327ab93345dd2b3d58ca6721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 08:44:14 +0000 Subject: [PATCH 26/87] 27 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myYACC.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 49d640ee..500465f5 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -16,20 +16,22 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : CheckConfig.py is a local utility to -# execute some functions about init instance +# Description : Backup.py is a local utility to backup binary file +# and parameter file ############################################################################# import os import sys + + + localDirPath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(sys.path[0] + "/../") -from local.parser.lex import * -from local.parser.yacc import * from local.parser.myLexer import tokens -from local.parser.variables import * -from local.parser.functions import * from local.parser.myLexer import token_dict +from local.parser.functions import get_function +from local.parser.variables import get_variable +from local.parser.yacc import yacc def exec_fn(fn): fn[0](*fn[1]) -- Gitee From b86a2506b4e65795487a437f9097e62753fb7125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 08:48:03 +0000 Subject: [PATCH 27/87] 28 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myYACC.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 500465f5..4118e9e5 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- ############################################################################# -# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# Copyright (c) 2024 Huawei Technologies Co.,Ltd. # # openGauss is licensed under Mulan PSL v2. # You can use this software according to the terms @@ -16,8 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : Backup.py is a local utility to backup binary file -# and parameter file +# Description : LocalCheckSE.py is a utility to check security configurations info on local node. ############################################################################# import os import sys -- Gitee From 3fce81f37deac0ede1adf24e0b96cb28238b3dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 08:48:21 +0000 Subject: [PATCH 28/87] 29 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/lex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 808f1470..2d62db4c 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- ############################################################################# -# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# Copyright (c) 2024 Huawei Technologies Co.,Ltd. # # openGauss is licensed under Mulan PSL v2. # You can use this software according to the terms @@ -16,7 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : LocalCheckOS.py is a utility to check OS info on local node. +# Description : LocalCheckSE.py is a utility to check security configurations info on local node. ############################################################################# import re import sys -- Gitee From cfb89f230c8dd90f30d4e82d879a03f65edf72ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 08:54:09 +0000 Subject: [PATCH 29/87] 30 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myYACC.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 4118e9e5..3c02f191 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -21,8 +21,6 @@ import os import sys - - localDirPath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(sys.path[0] + "/../") -- Gitee From d45d64d763d27527f50e59571053fa01d3848db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Fri, 6 Dec 2024 06:39:56 +0000 Subject: [PATCH 30/87] 120601 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/lex.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 2d62db4c..cc67d845 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -741,6 +741,9 @@ class LexerReflect(object): # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- +def build_error_log(errorlog): + if errorlog is None: + return Logger(sys.stderr) def lex(*, module=None, obj=None, debug=False, reflags=int(re.VERBOSE), debuglog=None, errorlog=None): @@ -750,10 +753,7 @@ def lex(*, module=None, obj=None, debug=False, stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() global token, lex_input - - if errorlog is None: - errorlog = Logger(sys.stderr) - + errorlog = build_error_log(errorlog) if debug: if debuglog is None: debuglog = Logger(sys.stderr) -- Gitee From a4bf6bad1464d30702055270c48a3ab64aa11587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Fri, 6 Dec 2024 06:53:56 +0000 Subject: [PATCH 31/87] 160602 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myLexer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/script/local/parser/myLexer.py b/script/local/parser/myLexer.py index 065d5a6d..8ee2f75c 100644 --- a/script/local/parser/myLexer.py +++ b/script/local/parser/myLexer.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- ############################################################################# -# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# Copyright (c) 2024 Huawei Technologies Co.,Ltd. # # openGauss is licensed under Mulan PSL v2. # You can use this software according to the terms @@ -16,15 +16,16 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : CheckInstall.py is a utility to install Gauss MPP Database. +# Description : myLexer.py is a utility to check security configurations info on local node. ############################################################################# import os import sys + localDirPath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(sys.path[0] + "/../") -from local.parser.lex import * from decimal import Decimal +from local.parser.lex import lex reserved = { 'NULL' : 'NULL', -- Gitee From f14acfa764251866279520c77f369b5741985af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Fri, 6 Dec 2024 06:55:30 +0000 Subject: [PATCH 32/87] 120603 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myYACC.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/local/parser/myYACC.py b/script/local/parser/myYACC.py index 3c02f191..362d1336 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/myYACC.py @@ -16,7 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : LocalCheckSE.py is a utility to check security configurations info on local node. +# Description : myYACC.py is a utility to check security configurations info on local node. ############################################################################# import os import sys -- Gitee From e1152689e811f856014dc35bc4e9ba8ed778a85f Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 9 Dec 2024 18:37:47 +0800 Subject: [PATCH 33/87] 120901 --- script/local/LocalCheckSE.py | 8 +- script/local/parser/parser.out | 372 +++--- script/local/parser/parsetab.py | 64 +- script/local/parser/utils.py | 9 - script/local/parser/yacc.py | 1142 ++++++++--------- .../local/parser/\346\226\207\346\263\225.md" | 8 +- 6 files changed, 744 insertions(+), 859 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index b728f44e..780c38d6 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -38,8 +38,8 @@ from domain_utils.cluster_file.version_info import VersionInfo from base_utils.os.net_util import NetUtil from domain_utils.domain_common.cluster_constants import ClusterConstants from datetime import datetime, timedelta -from local.parser.myYACC import MyYACC -from local.parser.myLexer import MyLexer +from local.parser.my_yacc import my_yacc +from local.parser.my_lexer import my_lexer from local.parser.utils import set_settings from local.parser.utils import set_dependency_settings from local.parser.utils import show_dependency_info @@ -5360,9 +5360,9 @@ def get_settings(): def process_dependencies(rule_files): current_path = os.path.dirname(os.path.realpath(__file__)) - m = MyLexer() + m = my_lexer() m.build() - y = MyYACC() + y = my_yacc() y.build() dependency_parser = y.yacc diff --git a/script/local/parser/parser.out b/script/local/parser/parser.out index e470295d..1cabbb24 100644 --- a/script/local/parser/parser.out +++ b/script/local/parser/parser.out @@ -24,14 +24,14 @@ Rule 18 term -> term TIMES factor Rule 19 term -> term DIVIDE factor Rule 20 term -> term MOD factor Rule 21 term -> factor -Rule 22 factor -> NUMBER -Rule 23 factor -> STRING -Rule 24 factor -> ID +Rule 22 factor -> number +Rule 23 factor -> string +Rule 24 factor -> id Rule 25 factor -> NULL Rule 26 factor -> TRUE Rule 27 factor -> FALSE Rule 28 factor -> LPAREN expr RPAREN -Rule 29 function -> ID LPAREN variables RPAREN +Rule 29 function -> id LPAREN variables RPAREN Rule 30 variables -> variables COMMA expr Rule 31 variables -> expr @@ -44,7 +44,7 @@ EQUAL : 8 FALSE : 27 GE : 10 GT : 11 -ID : 24 29 +id : 24 29 LE : 12 LPAREN : 14 28 29 LT : 13 @@ -53,11 +53,11 @@ MOD : 20 NEQUAL : 9 NOT : 6 NULL : 25 -NUMBER : 22 +number : 22 OR : 2 PLUS : 15 RPAREN : 14 28 29 -STRING : 23 +string : 23 THEN : 1 TIMES : 18 TRUE : 26 @@ -102,9 +102,9 @@ state 0 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE @@ -112,9 +112,9 @@ state 0 NOT shift and go to state 5 LPAREN shift and go to state 8 - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -181,18 +181,18 @@ state 5 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN LPAREN shift and go to state 8 - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -250,9 +250,9 @@ state 8 (21) term -> . factor (6) not_conditions -> . NOT cdt (7) not_conditions -> . cdt - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE @@ -266,9 +266,9 @@ state 8 (14) cdt -> . LPAREN conditions RPAREN NOT shift and go to state 5 - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -331,68 +331,68 @@ state 10 state 11 - (22) factor -> NUMBER . - - TIMES reduce using rule 22 (factor -> NUMBER .) - DIVIDE reduce using rule 22 (factor -> NUMBER .) - MOD reduce using rule 22 (factor -> NUMBER .) - EQUAL reduce using rule 22 (factor -> NUMBER .) - NEQUAL reduce using rule 22 (factor -> NUMBER .) - GE reduce using rule 22 (factor -> NUMBER .) - GT reduce using rule 22 (factor -> NUMBER .) - LE reduce using rule 22 (factor -> NUMBER .) - LT reduce using rule 22 (factor -> NUMBER .) - PLUS reduce using rule 22 (factor -> NUMBER .) - MINUS reduce using rule 22 (factor -> NUMBER .) - RPAREN reduce using rule 22 (factor -> NUMBER .) - AND reduce using rule 22 (factor -> NUMBER .) - THEN reduce using rule 22 (factor -> NUMBER .) - OR reduce using rule 22 (factor -> NUMBER .) - COMMA reduce using rule 22 (factor -> NUMBER .) + (22) factor -> number . + + TIMES reduce using rule 22 (factor -> number .) + DIVIDE reduce using rule 22 (factor -> number .) + MOD reduce using rule 22 (factor -> number .) + EQUAL reduce using rule 22 (factor -> number .) + NEQUAL reduce using rule 22 (factor -> number .) + GE reduce using rule 22 (factor -> number .) + GT reduce using rule 22 (factor -> number .) + LE reduce using rule 22 (factor -> number .) + LT reduce using rule 22 (factor -> number .) + PLUS reduce using rule 22 (factor -> number .) + MINUS reduce using rule 22 (factor -> number .) + RPAREN reduce using rule 22 (factor -> number .) + AND reduce using rule 22 (factor -> number .) + THEN reduce using rule 22 (factor -> number .) + OR reduce using rule 22 (factor -> number .) + COMMA reduce using rule 22 (factor -> number .) state 12 - (23) factor -> STRING . - - TIMES reduce using rule 23 (factor -> STRING .) - DIVIDE reduce using rule 23 (factor -> STRING .) - MOD reduce using rule 23 (factor -> STRING .) - EQUAL reduce using rule 23 (factor -> STRING .) - NEQUAL reduce using rule 23 (factor -> STRING .) - GE reduce using rule 23 (factor -> STRING .) - GT reduce using rule 23 (factor -> STRING .) - LE reduce using rule 23 (factor -> STRING .) - LT reduce using rule 23 (factor -> STRING .) - PLUS reduce using rule 23 (factor -> STRING .) - MINUS reduce using rule 23 (factor -> STRING .) - RPAREN reduce using rule 23 (factor -> STRING .) - AND reduce using rule 23 (factor -> STRING .) - THEN reduce using rule 23 (factor -> STRING .) - OR reduce using rule 23 (factor -> STRING .) - COMMA reduce using rule 23 (factor -> STRING .) + (23) factor -> string . + + TIMES reduce using rule 23 (factor -> string .) + DIVIDE reduce using rule 23 (factor -> string .) + MOD reduce using rule 23 (factor -> string .) + EQUAL reduce using rule 23 (factor -> string .) + NEQUAL reduce using rule 23 (factor -> string .) + GE reduce using rule 23 (factor -> string .) + GT reduce using rule 23 (factor -> string .) + LE reduce using rule 23 (factor -> string .) + LT reduce using rule 23 (factor -> string .) + PLUS reduce using rule 23 (factor -> string .) + MINUS reduce using rule 23 (factor -> string .) + RPAREN reduce using rule 23 (factor -> string .) + AND reduce using rule 23 (factor -> string .) + THEN reduce using rule 23 (factor -> string .) + OR reduce using rule 23 (factor -> string .) + COMMA reduce using rule 23 (factor -> string .) state 13 - (24) factor -> ID . - - TIMES reduce using rule 24 (factor -> ID .) - DIVIDE reduce using rule 24 (factor -> ID .) - MOD reduce using rule 24 (factor -> ID .) - EQUAL reduce using rule 24 (factor -> ID .) - NEQUAL reduce using rule 24 (factor -> ID .) - GE reduce using rule 24 (factor -> ID .) - GT reduce using rule 24 (factor -> ID .) - LE reduce using rule 24 (factor -> ID .) - LT reduce using rule 24 (factor -> ID .) - PLUS reduce using rule 24 (factor -> ID .) - MINUS reduce using rule 24 (factor -> ID .) - RPAREN reduce using rule 24 (factor -> ID .) - AND reduce using rule 24 (factor -> ID .) - THEN reduce using rule 24 (factor -> ID .) - OR reduce using rule 24 (factor -> ID .) - COMMA reduce using rule 24 (factor -> ID .) + (24) factor -> id . + + TIMES reduce using rule 24 (factor -> id .) + DIVIDE reduce using rule 24 (factor -> id .) + MOD reduce using rule 24 (factor -> id .) + EQUAL reduce using rule 24 (factor -> id .) + NEQUAL reduce using rule 24 (factor -> id .) + GE reduce using rule 24 (factor -> id .) + GT reduce using rule 24 (factor -> id .) + LE reduce using rule 24 (factor -> id .) + LT reduce using rule 24 (factor -> id .) + PLUS reduce using rule 24 (factor -> id .) + MINUS reduce using rule 24 (factor -> id .) + RPAREN reduce using rule 24 (factor -> id .) + AND reduce using rule 24 (factor -> id .) + THEN reduce using rule 24 (factor -> id .) + OR reduce using rule 24 (factor -> id .) + COMMA reduce using rule 24 (factor -> id .) state 14 @@ -464,7 +464,7 @@ state 16 state 17 (1) sentence -> conditions THEN . function - (29) function -> . ID LPAREN variables RPAREN + (29) function -> . id LPAREN variables RPAREN ID shift and go to state 36 @@ -491,9 +491,9 @@ state 18 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE @@ -501,9 +501,9 @@ state 18 NOT shift and go to state 5 LPAREN shift and go to state 8 - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -534,9 +534,9 @@ state 19 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE @@ -544,9 +544,9 @@ state 19 NOT shift and go to state 5 LPAREN shift and go to state 8 - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -577,17 +577,17 @@ state 21 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -607,17 +607,17 @@ state 22 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -637,17 +637,17 @@ state 23 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -667,17 +667,17 @@ state 24 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -697,17 +697,17 @@ state 25 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -727,17 +727,17 @@ state 26 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -754,17 +754,17 @@ state 27 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -780,17 +780,17 @@ state 28 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -814,9 +814,9 @@ state 29 (21) term -> . factor (4) and_conditions -> . and_conditions AND not_conditions (5) and_conditions -> . not_conditions - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE @@ -831,9 +831,9 @@ state 29 (13) cdt -> . expr LT expr (14) cdt -> . LPAREN conditions RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -883,17 +883,17 @@ state 31 state 32 (18) term -> term TIMES . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -904,17 +904,17 @@ state 32 state 33 (19) term -> term DIVIDE . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -925,17 +925,17 @@ state 33 state 34 (20) term -> term MOD . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -952,7 +952,7 @@ state 35 state 36 - (29) function -> ID . LPAREN variables RPAREN + (29) function -> id . LPAREN variables RPAREN LPAREN shift and go to state 53 @@ -1002,17 +1002,17 @@ state 40 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -1242,7 +1242,7 @@ state 52 state 53 - (29) function -> ID LPAREN . variables RPAREN + (29) function -> id LPAREN . variables RPAREN (30) variables -> . variables COMMA expr (31) variables -> . expr (15) expr -> . expr PLUS term @@ -1252,17 +1252,17 @@ state 53 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 @@ -1286,7 +1286,7 @@ state 54 state 55 - (29) function -> ID LPAREN variables . RPAREN + (29) function -> id LPAREN variables . RPAREN (30) variables -> variables . COMMA expr RPAREN shift and go to state 57 @@ -1307,7 +1307,7 @@ state 56 state 57 - (29) function -> ID LPAREN variables RPAREN . + (29) function -> id LPAREN variables RPAREN . $end reduce using rule 29 (function -> ID LPAREN variables RPAREN .) @@ -1322,17 +1322,17 @@ state 58 (19) term -> . term DIVIDE factor (20) term -> . term MOD factor (21) term -> . factor - (22) factor -> . NUMBER - (23) factor -> . STRING - (24) factor -> . ID + (22) factor -> . number + (23) factor -> . string + (24) factor -> . id (25) factor -> . NULL (26) factor -> . TRUE (27) factor -> . FALSE (28) factor -> . LPAREN expr RPAREN - NUMBER shift and go to state 11 - STRING shift and go to state 12 - ID shift and go to state 13 + number shift and go to state 11 + string shift and go to state 12 + id shift and go to state 13 NULL shift and go to state 14 TRUE shift and go to state 15 FALSE shift and go to state 16 diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py index 9b7ddbb1..38a20c9f 100644 --- a/script/local/parser/parsetab.py +++ b/script/local/parser/parsetab.py @@ -23,7 +23,7 @@ TAB_VERSION = '3.10' LR_METHOD = 'LALR' LR_SIGNATURE = 'AND COMMA DIVIDE EQUAL FALSE GE GT ID LE LPAREN LT MINUS MOD NEQUAL NOT NULL NUMBER OR PLUS RPAREN STRING THEN TIMES TRUEsentence : conditions THEN function \n conditions : conditions OR and_conditionsconditions : and_conditions\n and_conditions : and_conditions AND not_conditions\n and_conditions : not_conditionsnot_conditions : NOT cdtnot_conditions : cdt\n cdt : expr EQUAL expr\n | expr NEQUAL expr\n | expr GE expr\n | expr GT expr\n | expr LE expr\n | expr LT expr\n cdt : LPAREN conditions RPAREN\n expr : expr PLUS term\n | expr MINUS term\n expr : term\n term : term TIMES factor\n | term DIVIDE factor\n | term MOD factor\n term : factor\n factor : NUMBER\n | STRING\n factor : IDfactor : NULL\n factor : TRUE\n | FALSE\n factor : LPAREN expr RPARENfunction : ID LPAREN variables RPAREN\n variables : variables COMMA expr\n variables : expr' - + _lr_action_items = {'NOT':([0,8,18,19,29,],[5,5,5,5,5,]),'LPAREN':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,36,40,53,58,],[8,8,29,8,8,40,40,40,40,40,40,40,40,29,40,40,40,53,40,40,40,]),'NUMBER':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'STRING':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'ID':([0,5,8,17,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[13,13,13,36,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'NULL':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'TRUE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'FALSE':([0,5,8,18,19,21,22,23,24,25,26,27,28,29,32,33,34,40,53,58,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'$end':([1,35,57,],[0,-1,-29,]),'THEN':([2,3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[17,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'OR':([2,3,4,6,9,10,11,12,13,14,15,16,20,30,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[18,-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,18,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'RPAREN':([3,4,6,9,10,11,12,13,14,15,16,20,30,31,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,54,55,56,59,],[-3,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,48,49,-2,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,49,57,-31,-30,]),'AND':([3,4,6,9,10,11,12,13,14,15,16,20,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,],[19,-5,-7,-17,-21,-22,-23,-24,-25,-26,-27,-6,19,-4,-8,-9,-10,-11,-12,-13,-15,-16,-14,-28,-18,-19,-20,]),'EQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[21,-17,-21,-22,-23,-24,-25,-26,-27,21,-15,-16,-28,-18,-19,-20,]),'NEQUAL':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[22,-17,-21,-22,-23,-24,-25,-26,-27,22,-15,-16,-28,-18,-19,-20,]),'GE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[23,-17,-21,-22,-23,-24,-25,-26,-27,23,-15,-16,-28,-18,-19,-20,]),'GT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[24,-17,-21,-22,-23,-24,-25,-26,-27,24,-15,-16,-28,-18,-19,-20,]),'LE':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[25,-17,-21,-22,-23,-24,-25,-26,-27,25,-15,-16,-28,-18,-19,-20,]),'LT':([7,9,10,11,12,13,14,15,16,31,46,47,49,50,51,52,],[26,-17,-21,-22,-23,-24,-25,-26,-27,26,-15,-16,-28,-18,-19,-20,]),'PLUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[27,-17,-21,-22,-23,-24,-25,-26,-27,27,27,27,27,27,27,27,-15,-16,-28,-18,-19,-20,27,27,27,]),'MINUS':([7,9,10,11,12,13,14,15,16,31,39,41,42,43,44,45,46,47,49,50,51,52,54,56,59,],[28,-17,-21,-22,-23,-24,-25,-26,-27,28,28,28,28,28,28,28,-15,-16,-28,-18,-19,-20,28,28,28,]),'COMMA':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,55,56,59,],[-17,-21,-22,-23,-24,-25,-26,-27,-15,-16,-28,-18,-19,-20,58,-31,-30,]),'TIMES':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[32,-21,-22,-23,-24,-25,-26,-27,32,32,-28,-18,-19,-20,]),'DIVIDE':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[33,-21,-22,-23,-24,-25,-26,-27,33,33,-28,-18,-19,-20,]),'MOD':([9,10,11,12,13,14,15,16,46,47,49,50,51,52,],[34,-21,-22,-23,-24,-25,-26,-27,34,34,-28,-18,-19,-20,]),} _lr_action = {} @@ -45,35 +45,35 @@ for _k, _v in _lr_goto_items.items(): del _lr_goto_items _lr_productions = [ ("S' -> sentence","S'",1,None,None,None), - ('sentence -> conditions THEN function','sentence',3,'p_conditions_relation_function','myYACC.py',15), - ('conditions -> conditions OR and_conditions','conditions',3,'p_conditions_or','myYACC.py',21), - ('conditions -> and_conditions','conditions',1,'p_conditions_and_conditions','myYACC.py',25), - ('and_conditions -> and_conditions AND not_conditions','and_conditions',3,'p_and_conditions_and','myYACC.py',30), - ('and_conditions -> not_conditions','and_conditions',1,'p_and_conditions_cdt','myYACC.py',35), - ('not_conditions -> NOT cdt','not_conditions',2,'p_not_cdt','myYACC.py',39), - ('not_conditions -> cdt','not_conditions',1,'p_not_conditions_cdt','myYACC.py',43), - ('cdt -> expr EQUAL expr','cdt',3,'p_cdt_ops','myYACC.py',48), - ('cdt -> expr NEQUAL expr','cdt',3,'p_cdt_ops','myYACC.py',49), - ('cdt -> expr GE expr','cdt',3,'p_cdt_ops','myYACC.py',50), - ('cdt -> expr GT expr','cdt',3,'p_cdt_ops','myYACC.py',51), - ('cdt -> expr LE expr','cdt',3,'p_cdt_ops','myYACC.py',52), - ('cdt -> expr LT expr','cdt',3,'p_cdt_ops','myYACC.py',53), - ('cdt -> LPAREN conditions RPAREN','cdt',3,'p_cdt_parens','myYACC.py',73), - ('expr -> expr PLUS term','expr',3,'p_expr_plus_minus','myYACC.py',78), - ('expr -> expr MINUS term','expr',3,'p_expr_plus_minus','myYACC.py',79), - ('expr -> term','expr',1,'p_expr_term','myYACC.py',87), - ('term -> term TIMES factor','term',3,'p_term_times_divide_mod','myYACC.py',92), - ('term -> term DIVIDE factor','term',3,'p_term_times_divide_mod','myYACC.py',93), - ('term -> term MOD factor','term',3,'p_term_times_divide_mod','myYACC.py',94), - ('term -> factor','term',1,'p_term_factor','myYACC.py',104), - ('factor -> NUMBER','factor',1,'p_factor_assign_simple','myYACC.py',109), - ('factor -> STRING','factor',1,'p_factor_assign_simple','myYACC.py',110), - ('factor -> ID','factor',1,'p_factor_id','myYACC.py',115), - ('factor -> NULL','factor',1,'p_factor_null','myYACC.py',119), - ('factor -> TRUE','factor',1,'p_factor_bool','myYACC.py',124), - ('factor -> FALSE','factor',1,'p_factor_bool','myYACC.py',125), - ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','myYACC.py',133), - ('function -> ID LPAREN variables RPAREN','function',4,'p_function','myYACC.py',137), - ('variables -> variables COMMA expr','variables',3,'p_variables_comma','myYACC.py',141), - ('variables -> expr','variables',1,'p_variables_factor','myYACC.py',146), + ('sentence -> conditions THEN function','sentence',3,'p_conditions_relation_function','my_yacc.py',15), + ('conditions -> conditions OR and_conditions','conditions',3,'p_conditions_or','my_yacc.py',21), + ('conditions -> and_conditions','conditions',1,'p_conditions_and_conditions','my_yacc.py',25), + ('and_conditions -> and_conditions AND not_conditions','and_conditions',3,'p_and_conditions_and','my_yacc.py',30), + ('and_conditions -> not_conditions','and_conditions',1,'p_and_conditions_cdt','my_yacc.py',35), + ('not_conditions -> NOT cdt','not_conditions',2,'p_not_cdt','my_yacc.py',39), + ('not_conditions -> cdt','not_conditions',1,'p_not_conditions_cdt','my_yacc.py',43), + ('cdt -> expr EQUAL expr','cdt',3,'p_cdt_ops','my_yacc.py',48), + ('cdt -> expr NEQUAL expr','cdt',3,'p_cdt_ops','my_yacc.py',49), + ('cdt -> expr GE expr','cdt',3,'p_cdt_ops','my_yacc.py',50), + ('cdt -> expr GT expr','cdt',3,'p_cdt_ops','my_yacc.py',51), + ('cdt -> expr LE expr','cdt',3,'p_cdt_ops','my_yacc.py',52), + ('cdt -> expr LT expr','cdt',3,'p_cdt_ops','my_yacc.py',53), + ('cdt -> LPAREN conditions RPAREN','cdt',3,'p_cdt_parens','my_yacc.py',73), + ('expr -> expr PLUS term','expr',3,'p_expr_plus_minus','my_yacc.py',78), + ('expr -> expr MINUS term','expr',3,'p_expr_plus_minus','my_yacc.py',79), + ('expr -> term','expr',1,'p_expr_term','my_yacc.py',87), + ('term -> term TIMES factor','term',3,'p_term_times_divide_mod','my_yacc.py',92), + ('term -> term DIVIDE factor','term',3,'p_term_times_divide_mod','my_yacc.py',93), + ('term -> term MOD factor','term',3,'p_term_times_divide_mod','my_yacc.py',94), + ('term -> factor','term',1,'p_term_factor','my_yacc.py',104), + ('factor -> number','factor',1,'p_factor_assign_simple','my_yacc.py',109), + ('factor -> string','factor',1,'p_factor_assign_simple','my_yacc.py',110), + ('factor -> id','factor',1,'p_factor_id','my_yacc.py',115), + ('factor -> NULL','factor',1,'p_factor_null','my_yacc.py',119), + ('factor -> TRUE','factor',1,'p_factor_bool','my_yacc.py',124), + ('factor -> FALSE','factor',1,'p_factor_bool','my_yacc.py',125), + ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','my_yacc.py',133), + ('function -> id LPAREN variables RPAREN','function',4,'p_function','my_yacc.py',137), + ('variables -> variables COMMA expr','variables',3,'p_variables_comma','my_yacc.py',141), + ('variables -> expr','variables',1,'p_variables_factor','my_yacc.py',146), ] diff --git a/script/local/parser/utils.py b/script/local/parser/utils.py index 136f9555..5715faf2 100644 --- a/script/local/parser/utils.py +++ b/script/local/parser/utils.py @@ -25,15 +25,6 @@ import re import sys sys.path.append(sys.path[0] + "/../") -from gspylib.common.GaussLog import GaussLog -from gspylib.common.ParameterParsecheck import Parameter -from gspylib.common.Common import DefaultValue, ClusterCommand -from gspylib.common.ErrorCode import ErrorCode -from base_utils.common.fast_popen import FastPopen -from domain_utils.cluster_file.cluster_log import ClusterLog -from base_utils.os.env_util import EnvUtil -from domain_utils.cluster_os.cluster_user import ClusterUser -from domain_utils.domain_common.cluster_constants import ClusterConstants ########## print diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 7627bce6..16da4989 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -222,57 +222,38 @@ class LRParser: # tracking. In this mode, symbols will record the starting/ending line number and # character index. - def parse(self, input=None, lexer=None, debug=False, tracking=False): - # If debugging has been specified as a flag, turn it into a logging object + def parse(self, put=None, lexer=None, debug=False, tracking=False): if isinstance(debug, int) and debug: debug = Logger(sys.stderr) - - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + lookahead = None + lookaheadstack = [] + actions = self.action + goto = self.goto + prod = self.productions + defaulted_states = self.defaulted_states + pslice = YaccProduction(None) + errorcount = 0 if debug: debug.info('PARSE DEBUG START') - - # If no lexer was given, we will try to use the lex module if not lexer: from . import lex lexer = lex.lexer - - # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - # Set the token function + if put is not None: + lexer.input(put) get_token = self.token = lexer.token - - # Set up the state and symbol stacks - statestack = self.statestack = [] # Stack of parsing states - symstack = self.symstack = [] # Stack of grammar symbols - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - + statestack = self.statestack = [] + symstack = self.symstack = [] + pslice.stack = symstack + errtoken = None statestack.append(0) sym = YaccSymbol() sym.type = '$end' symstack.append(sym) state = 0 while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - if debug: debug.debug('State : %s', state) @@ -285,8 +266,6 @@ class LRParser: if not lookahead: lookahead = YaccSymbol() lookahead.type = '$end' - - # Check the action table ltype = lookahead.type t = actions[state].get(ltype) else: @@ -300,7 +279,6 @@ class LRParser: if t is not None: if t > 0: - # shift a symbol on the stack statestack.append(t) state = t @@ -309,19 +287,14 @@ class LRParser: symstack.append(lookahead) lookahead = None - - # Decrease error count on successful shift if errorcount: errorcount -= 1 continue if t < 0: - # reduce a symbol on the stack, emit a production p = prod[-t] pname = p.name plen = p.len - - # Get production function sym = YaccSymbol() sym.type = pname # Production name sym.value = None @@ -346,12 +319,6 @@ class LRParser: t1 = targ[-1] sym.endlineno = getattr(t1, 'endlineno', t1.lineno) sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - pslice.slice = targ try: @@ -366,7 +333,6 @@ class LRParser: state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: - # If an error was set. Enter error recovery state lookaheadstack.append(lookahead) # Save the current lookahead token symstack.extend(targ[1:-1]) # Put the production slice back on the stack statestack.pop() # Pop back one state (before the reduce) @@ -386,16 +352,9 @@ class LRParser: sym.lexpos = lexer.lexpos targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - pslice.slice = targ try: - # Call the grammar rule with our special slice object self.state = state p.callable(pslice) if debug: @@ -404,7 +363,6 @@ class LRParser: state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: - # If an error was set. Enter error recovery state lookaheadstack.append(lookahead) # Save the current lookahead token statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] @@ -431,17 +389,6 @@ class LRParser: if debug: debug.error('Error : %s', ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = ERROR_COUNT self.errorok = False @@ -454,9 +401,6 @@ class LRParser: self.state = state tok = self.errorfunc(errtoken) if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead lookahead = tok errtoken = None continue @@ -476,39 +420,23 @@ class LRParser: else: errorcount = ERROR_COUNT - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None state = 0 - # Nuke the pushback stack del lookaheadstack[:] continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out return if lookahead.type != 'error': sym = symstack[-1] if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue if tracking: sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) lookahead = None continue - - # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' @@ -526,10 +454,7 @@ class LRParser: lookahead.lexpos = sym.lexpos statestack.pop() state = statestack[-1] - continue - - # If we'r here, something really bad happened raise RuntimeError('yacc: internal parser error!!!\n') @@ -783,23 +708,24 @@ class Grammar(object): # are valid and that %prec is used correctly. # ----------------------------------------------------------------------------- - def add_production(self, prodname, syms, func=None, file='', line=0): - + def validate_prodname(self, prodname, file, line): + """Validate the production name.""" if prodname in self.Terminals: - raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) + raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token') if prodname == 'error': - raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) + raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word') if not _is_identifier.match(prodname): - raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) + raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}') - # Look for literal tokens + def handle_literal_tokens(self, syms, file, line, prodname): + """Handle literal tokens in the rule symbols.""" for n, s in enumerate(syms): if s[0] in "'\"": try: c = eval(s) - if (len(c) > 1): - raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % - (file, line, s, prodname)) + if len(c) > 1: + raise GrammarError( + f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') if c not in self.Terminals: self.Terminals[c] = [] syms[n] = c @@ -807,35 +733,49 @@ class Grammar(object): except SyntaxError: pass if not _is_identifier.match(s) and s != '%prec': - raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + raise GrammarError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') - # Determine the precedence level + def handle_precedence(self, syms, file, line): + """Handle precedence settings in the rule.""" if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) + raise GrammarError(f'{file}:{line}: Syntax error. Nothing follows %%prec') if syms[-2] != '%prec': - raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % - (file, line)) + raise GrammarError(f'{file}:{line}: Syntax error. %%prec can only appear at the end of a grammar rule') precname = syms[-1] prodprec = self.Precedence.get(precname) if not prodprec: - raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) - else: - self.UsedPrecedence.add(precname) + raise GrammarError(f'{file}:{line}: Nothing known about the precedence of {precname!r}') + self.UsedPrecedence.add(precname) del syms[-2:] # Drop %prec from the rule + return prodprec else: # If no %prec, precedence is determined by the rightmost terminal symbol precname = rightmost_terminal(syms, self.Terminals) - prodprec = self.Precedence.get(precname, ('right', 0)) + return self.Precedence.get(precname, ('right', 0)) + + def check_duplicate_rule(self, prodname, syms, file, line): + """Check for duplicate rule definitions.""" + rule_map = f'{prodname} -> {syms}' + if rule_map in self.Prodmap: + m = self.Prodmap[rule_map] + raise GrammarError(f'{file}:{line}: Duplicate rule {rule_map}. Previous definition at {m.file}:{m.line}') + + def add_production(self, prodname, syms, func=None, file='', line=0): + """Main method to add a production.""" + # Validate the production name + self.validate_prodname(prodname, file, line) - # See if the rule is already in the rulemap - map = '%s -> %s' % (prodname, syms) - if map in self.Prodmap: - m = self.Prodmap[map] - raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + - 'Previous definition at %s:%d' % (m.file, m.line)) + # Handle literal tokens in the symbols + self.handle_literal_tokens(syms, file, line, prodname) - # From this point on, everything is valid. Create a new Production instance + # Handle precedence + prodprec = self.handle_precedence(syms, file, line) + + # Check for duplicate rules + self.check_duplicate_rule(prodname, syms, file, line) + + # Create a new production instance pnumber = len(self.Productions) if prodname not in self.Nonterminals: self.Nonterminals[prodname] = [] @@ -849,10 +789,10 @@ class Grammar(object): self.Nonterminals[t] = [] self.Nonterminals[t].append(pnumber) - # Create a production and add it to the list of productions + # Create and add the production p = Production(pnumber, prodname, syms, prodprec, func, file, line) self.Productions.append(p) - self.Prodmap[map] = p + self.Prodmap[f'{prodname} -> {syms}'] = p # Add to the global productions list try: @@ -918,36 +858,43 @@ class Grammar(object): for n in self.Nonterminals: terminates[n] = False - # Then propagate termination until no change: + # Propagate termination until no change + self.propagate_termination(terminates) + + # Collect symbols that do not terminate + infinite = self.collect_infinite(terminates) + + return infinite + + def propagate_termination(self, terminates): while True: some_change = False for (n, pl) in self.Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = False - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = True - - if p_terminates: - # symbol n terminates! - if not terminates[n]: - terminates[n] = True - some_change = True - # Don't need to consider any more productions for this n. - break - + some_change |= self.check_productions_for_termination(n, pl, terminates) if not some_change: break + def check_productions_for_termination(self, n, productions, terminates): + some_change = False + for p in productions: + p_terminates = self.check_production_termination(p, terminates) + if p_terminates: + if not terminates[n]: + terminates[n] = True + some_change = True + # Don't need to consider any more productions for this nonterminal. + break + return some_change + + def check_production_termination(self, production, terminates): + for s in production.prod: + if not terminates.get(s, False): + # If any symbol does not terminate, the production does not terminate. + return False + # All symbols terminate, so production terminates. + return True + + def collect_infinite(self, terminates): infinite = [] for (s, term) in terminates.items(): if not term: @@ -957,9 +904,9 @@ class Grammar(object): pass else: infinite.append(s) - return infinite + # ----------------------------------------------------------------------------- # # Find all symbols that were used the grammar, but not defined as tokens or @@ -1020,33 +967,12 @@ class Grammar(object): return unused - # ------------------------------------------------------------------------- - # - # Compute the value of FIRST1(beta) where beta is a tuple of symbols. - # - # During execution of compute_first1, the result may be incomplete. - # Afterward (e.g., when called from compute_follow()), it will be complete. - # ------------------------------------------------------------------------- def _first(self, beta): - # We are computing First(x1,x2,x3,...,xn) result = [] for x in beta: - x_produces_empty = False - - # Add all the non- symbols of First[x] to the result. - for f in self.First[x]: - if f == '': - x_produces_empty = True - else: - if f not in result: - result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: + x_produces_empty = self._process_first_set(x, result) + if not x_produces_empty: # We don't have to consider any further symbols in beta. break else: @@ -1057,46 +983,51 @@ class Grammar(object): return result - # ------------------------------------------------------------------------- - # - # Compute the value of FIRST1(X) for all symbols - # ------------------------------------------------------------------------- + def _process_first_set(self, x, result): + x_produces_empty = False + # Add all the non- symbols of First[x] to the result. + for f in self.First[x]: + if f == '': + x_produces_empty = True + else: + if f not in result: + result.append(f) + return x_produces_empty + def compute_first(self): if self.First: return self.First - # Terminals: for t in self.Terminals: self.First[t] = [t] - self.First['$end'] = ['$end'] - # Nonterminals: - # Initialize to the empty set: for n in self.Nonterminals: self.First[n] = [] - # Then propagate symbols until no change: while True: some_change = False - for n in self.Nonterminals: - for p in self.Prodnames[n]: - for f in self._first(p.prod): - if f not in self.First[n]: - self.First[n].append(f) - some_change = True + some_change = self._propagate_first() if not some_change: break - return self.First - # --------------------------------------------------------------------- - # - # Computes all of the follow sets for every non-terminal symbol. The - # follow set is the set of all symbols that might follow a given - # non-terminal. See the Dragon book, 2nd Ed. p. 189. - # --------------------------------------------------------------------- + def _propagate_first(self): + some_change = False + for n in self.Nonterminals: + some_change |= self._update_first_set(n) + return some_change + + def _update_first_set(self, nonterminal): + some_change = False + for p in self.Prodnames[nonterminal]: + for f in self._first(p.prod): + if f not in self.First[nonterminal]: + self.First[nonterminal].append(f) + some_change = True + return some_change + def compute_follow(self, start=None): # If already computed, return the result if self.Follow: @@ -1116,43 +1047,43 @@ class Grammar(object): self.Follow[start] = ['$end'] while True: - didadd = False - for p in self.Productions[1:]: - # Here is the production set - for i, B in enumerate(p.prod): - if B in self.Nonterminals: - # Okay. We got a non-terminal in a production - fst = self._first(p.prod[i + 1:]) - hasempty = False - for f in fst: - if f != '' and f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True - if f == '': - hasempty = True - if hasempty or i == (len(p.prod) - 1): - # Add elements of follow(a) to follow(b) - for f in self.Follow[p.name]: - if f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True + didadd = self.process_productions() if not didadd: break + return self.Follow - # ----------------------------------------------------------------------------- - # - # This function walks the list of productions and builds a complete set of the - # LR items. The LR items are stored in two ways: First, they are uniquely - # numbered and placed in the list _lritems. Second, a linked list of LR items - # is built for each production. For example: - # - # E -> E PLUS E - # - # Creates the list - # - # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] - # ----------------------------------------------------------------------------- + def process_productions(self): + didadd = False + for p in self.Productions[1:]: + didadd = self.process_production(p, didadd) + return didadd + + def process_production(self, p, didadd): + for i, B in enumerate(p.prod): + if B in self.Nonterminals: + fst = self._first(p.prod[i + 1:]) + didadd = self.process_first_set(fst, B, p, i, didadd) + return didadd + + def process_first_set(self, fst, B, p, i, didadd): + hasempty = False + for f in fst: + if f != '' and f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if f == '': + hasempty = True + if hasempty or i == (len(p.prod) - 1): + didadd = self.add_follow_to_nonterminal(p, B, didadd) + return didadd + + def add_follow_to_nonterminal(self, p, B, didadd): + for f in self.Follow[p.name]: + if f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + return didadd def build_lritems(self): for p in self.Productions: @@ -1160,21 +1091,7 @@ class Grammar(object): i = 0 lr_items = [] while True: - if i > len(p): - lri = None - else: - lri = LRItem(p, i) - # Precompute the list of productions immediately following - try: - lri.lr_after = self.Prodnames[lri.prod[i + 1]] - except (IndexError, KeyError): - lri.lr_after = [] - try: - lri.lr_before = lri.prod[i - 1] - except IndexError: - lri.lr_before = None - - lastlri.lr_next = lri + lri = self._process_lr_item(p, i, lastlri) if not lri: break lr_items.append(lri) @@ -1182,27 +1099,27 @@ class Grammar(object): i += 1 p.lr_items = lr_items + def _process_lr_item(self, p, i, lastlri): + """ + Process a single LR item step and return the next lri object. + """ + if i > len(p): + lri = None + else: + lri = LRItem(p, i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.Prodnames[lri.prod[i + 1]] + except (IndexError, KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i - 1] + except IndexError: + lri.lr_before = None -# ----------------------------------------------------------------------------- -# === LR Generator === -# -# The following classes and functions are used to generate LR parsing tables on -# a grammar. -# ----------------------------------------------------------------------------- + lastlri.lr_next = lri + return lri -# ----------------------------------------------------------------------------- -# -# The following two functions are used to compute set valued functions -# of the form: -# -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ def digraph(X, R, FP): N = {} @@ -1297,24 +1214,26 @@ class LRTable: J = I[:] didadd = True while didadd: - didadd = False - for j in J: - for x in j.lr_after: - if getattr(x, 'lr0_added', 0) == self._add_count: - continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = self._add_count - didadd = True + didadd = self._process_lr0_closure(J) return J - # Compute the LR(0) goto function goto(I,X) where I is a set - # of LR(0) items and X is a grammar symbol. This function is written - # in a way that guarantees uniqueness of the generated goto sets - # (i.e. the same goto set will never be returned as two different Python - # objects). With uniqueness, we can later do fast set comparisons using - # id(obj) instead of element-wise comparison. + def _process_lr0_closure(self, J): + """ + Process a single step of the lr0 closure algorithm. + It tries to add new LR items to the closure. + """ + didadd = False + for j in J: + for x in j.lr_after: + if getattr(x, 'lr0_added', 0) == self._add_count: + continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = self._add_count + didadd = True + + return didadd def lr0_goto(self, I, x): # First we look for a previously cached entry @@ -1369,112 +1288,70 @@ class LRTable: for ii in I: for s in ii.usyms: asyms[s] = None - for x in asyms: g = self.lr0_goto(I, x) if not g or id(g) in self.lr0_cidhash: continue self.lr0_cidhash[id(g)] = len(C) C.append(g) - return C - # ----------------------------------------------------------------------------- - # ==== LALR(1) Parsing ==== - # - # LALR(1) parsing is almost exactly the same as SLR except that instead of - # relying upon Follow() sets when performing reductions, a more selective - # lookahead set that incorporates the state of the LR(0) machine is utilized. - # Thus, we mainly just have to focus on calculating the lookahead sets. - # - # The method used here is due to DeRemer and Pennelo (1982). - # - # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) - # Lookahead Sets", ACM Transactions on Programming Languages and Systems, - # Vol. 4, No. 4, Oct. 1982, pp. 615-649 - # - # Further details can also be found in: - # - # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", - # McGraw-Hill Book Company, (1985). - # - # ----------------------------------------------------------------------------- - - # ----------------------------------------------------------------------------- - # - # Creates a dictionary containing all of the non-terminals that might produce - # an empty production. - # ----------------------------------------------------------------------------- - def compute_nullable_nonterminals(self): nullable = set() num_nullable = 0 while True: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable.add(p.name) - continue - for t in p.prod: - if t not in nullable: - break - else: - nullable.add(p.name) + num_nullable = self._process_nullable_step(nullable, num_nullable) if len(nullable) == num_nullable: break - num_nullable = len(nullable) return nullable - # ----------------------------------------------------------------------------- - # - # Given a set of LR(0) items, this functions finds all of the non-terminal - # transitions. These are transitions in which a dot appears immediately before - # a non-terminal. Returns a list of tuples of the form (state,N) where state - # is the state number and N is the nonterminal symbol. - # - # The input C is the set of LR(0) items. - # ----------------------------------------------------------------------------- + def _process_nullable_step(self, nullable, num_nullable): + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) + continue + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + return len(nullable) def find_nonterminal_transitions(self, C): trans = [] for stateno, state in enumerate(C): for p in state: - if p.lr_index < p.len - 1: - t = (stateno, p.prod[p.lr_index + 1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: - trans.append(t) + self._process_transition(p, stateno, trans) return trans - # ----------------------------------------------------------------------------- - # - # Computes the DR(p,A) relationships for non-terminal transitions. The input - # is a tuple (state,N) where state is a number and N is a nonterminal symbol. - # - # Returns a list of terminals. - # ----------------------------------------------------------------------------- + def _process_transition(self, p, stateno, trans): + """ + Process a single transition and update the trans list. + This method checks if the transition should be added. + """ + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index + 1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) def dr_relation(self, C, trans, nullable): state, N = trans terms = [] - g = self.lr0_goto(C[state], N) for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in self.grammar.Terminals: - if a not in terms: - terms.append(a) - - # This extra bit is to handle the start state + self._process_relation(p, terms) if state == 0 and N == self.grammar.Productions[0].prod[0]: terms.append('$end') return terms - # ----------------------------------------------------------------------------- - # - # Computes the READS() relation (p,A) READS (t,C). - # ----------------------------------------------------------------------------- + def _process_relation(self, p, terms): + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) def reads_relation(self, C, trans, empty): # Look for empty transitions @@ -1520,9 +1397,7 @@ class LRTable: includedict = {} # Dictionary of include relations # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 + dtrans = {t: 1 for t in trans} # Dictionary comprehension to simplify the creation # Loop over all transitions and compute lookbacks and includes for state, N in trans: @@ -1532,49 +1407,10 @@ class LRTable: if p.name != N: continue - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j, t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: - break # No forget it - if p.prod[li] not in nullable: - break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j, t)) - - g = self.lr0_goto(C[j], t) # Go to next set - j = self.lr0_cidhash.get(id(g), -1) # Go to next state + # Okay, we have a name match. Follow the production all the way through the state machine + self._process_lookback_and_include(C, state, p, dtrans, includes, lookb, nullable) - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: - continue - if r.len != p.len: - continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i + 1]: - break - i = i + 1 - else: - lookb.append((j, r)) + # Store the computed relations for i in includes: if i not in includedict: includedict[i] = [] @@ -1583,6 +1419,61 @@ class LRTable: return lookdict, includedict + def _process_lookback_and_include(self, C, state, p, dtrans, includes, lookb, nullable): + """ + Process lookback and include relations for a single production. + This handles the inner `while` loop logic and `lookb` and `includes` updates. + """ + lr_index = p.lr_index + j = state + + # Process the production from the state machine + while lr_index < p.len - 1: + lr_index += 1 + t = p.prod[lr_index] + + # Check for non-terminal transitions + if (j, t) in dtrans: + # There is a chance this is an includes relation + self._process_include_relation(p, lr_index, j, t, includes, nullable) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state + + # Final state is j; now locate the production + self._process_lookback_relation(C, j, p, lookb) + + def _process_include_relation(self, p, lr_index, j, t, includes, nullable): + """ + Process the includes relation based on the production and nullable symbols. + """ + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li += 1 + else: + includes.append((j, t)) + + def _process_lookback_relation(self, C, j, p, lookb): + """ + Process the lookback relation by comparing the current and previous productions. + """ + for r in C[j]: + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + while i < r.lr_index: + if r.prod[i] != p.prod[i + 1]: + break + i += 1 + else: + lookb.append((j, r)) + # ----------------------------------------------------------------------------- # # Given a set of LR(0) items, this function computes the read sets. @@ -1614,27 +1505,23 @@ class LRTable: F = digraph(ntrans, R, FP) return F - # ----------------------------------------------------------------------------- - # - # Attaches the lookahead symbols to grammar rules. - # - # Inputs: lookbacks - Set of lookback relations - # followset - Computed follow set - # - # This function directly attaches the lookaheads to productions contained - # in the lookbacks set - # ----------------------------------------------------------------------------- - def add_lookaheads(self, lookbacks, followset): for trans, lb in lookbacks.items(): # Loop over productions in lookback for state, p in lb: - if state not in p.lookaheads: - p.lookaheads[state] = [] + self._ensure_lookaheads(p, state) # Ensure lookaheads for the production + f = followset.get(trans, []) - for a in f: - if a not in p.lookaheads[state]: - p.lookaheads[state].append(a) + self._add_lookaheads_to_production(p, state, f) # Add lookaheads from followset + + def _ensure_lookaheads(self, p, state): + if state not in p.lookaheads: + p.lookaheads[state] = [] + + def _add_lookaheads_to_production(self, p, state, followset_elements): + for a in followset_elements: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- # @@ -1665,185 +1552,184 @@ class LRTable: # # This function constructs the parse tables for SLR or LALR # ----------------------------------------------------------------------------- - def lr_parse_table(self): - Productions = self.grammar.Productions - Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array - action = self.lr_action # Action array - log = self.log # Logger for output + def handle_shift_reduce_conflict(self, st, a, p, r, Precedence, Productions, log, j=None): + """Handle shift/reduce conflict.""" + if r > 0: + sprec, slevel = Precedence.get(a, ('right', 0)) + rprec, rlevel = Productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + return -p.number, p, 'reduce', None + elif (slevel == rlevel) and (rprec == 'nonassoc'): + return None, None, None, None + else: + return j, p, 'shift', None + elif r < 0: + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + return -p.number, p, 'reduce', oldp + else: + return -oldp.number, oldp, 'reduce', pp + return None, None, None, None - actionp = {} # Action production array (temporary) + def log_shift_reduce_action(self, log, a, m): + """Log shift/reduce or reduce/reduce actions.""" + log.info(' %-15s %s', a, m) - # This determines the number of states + def process_state_transitions(self, st, I, st_action, Precedence, Productions, action, goto, log): + """Process state transitions and handle conflicts.""" + st_goto = {} + actlist = [] + st_actionp = {} + for p in I: + if p.len == p.lr_index + 1: + if p.name == "S'": + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, f'reduce using rule {p.number} ({p})')) + r = st_action.get(a) + if r is not None: + shift, new_p, action_type, reject_p = self.handle_shift_reduce_conflict(st, a, p, r, + Precedence, + Productions, log) + if action_type == 'reduce': + st_action[a] = shift + st_actionp[a] = new_p + Productions[new_p.number].reduced += 1 + elif action_type == 'shift': + self.log_shift_reduce_action(self, log, a, f"shift and go to state {j}") + else: + st_action[a] = None + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i + 1] + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + actlist.append((a, p, f'shift and go to state {j}')) + r = st_action.get(a) + if r is not None: + if r > 0 and r != j: + raise LALRError(f'Shift/shift conflict in state {st}') + elif r < 0: + sprec, slevel = Precedence.get(a, ('right', 0)) + rprec, rlevel = Productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + elif slevel == rlevel and rprec == 'nonassoc': + st_action[a] = None + else: + self.log_shift_reduce_action(self, log, a, "shift") + else: + st_action[a] = j + st_actionp[a] = p + return st_action, st_actionp, st_goto, actlist + + def lr_parse_table(self): + Productions = self.grammar.Productions + Precedence = self.grammar.Precedence + goto = self.lr_goto + action = self.lr_action + log = self.log + actionp = {} C = self.lr0_items() self.add_lalr_lookaheads(C) - - # Build the parser table, state by state st = 0 for I in C: - # Loop over each production in I - actlist = [] # List of actions - st_action = {} - st_actionp = {} - st_goto = {} log.info('') - log.info('state %d', st) + log.info(f'state {st}') log.info('') - for p in I: - log.info(' (%d) %s', p.number, p) + self._log_productions(I, log) # Log productions for the current state log.info('') - for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action['$end'] = 0 - st_actionp['$end'] = p - else: - # We are at the end of a production. Reduce! - laheads = p.lookaheads[st] - for a in laheads: - actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) - r = st_action.get(a) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ('right', 0)) - - # Reduce precedence comes from rule being reduced (p) - rprec, rlevel = Productions[p.number].prec - - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp, rejectp = pp, oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp, rejectp = oldp, pp - self.rr_conflicts.append((st, chosenp, rejectp)) - log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', - a, st_actionp[a].number, st_actionp[a]) - else: - raise LALRError('Unknown conflict in state %d' % st) - else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i + 1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I, a) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - # We are in a shift state - actlist.append((a, p, 'shift and go to state %d' % j)) - r = st_action.get(a) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError('Shift/shift conflict in state %d' % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ('right', 0)) - - # Reduce precedence comes from the rule that could have been reduced - rprec, rlevel = Productions[st_actionp[a].number].prec - - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) + # Process the state transitions and conflicts + st_action = {} + st_actionp = {} + st_goto = {} + st_action, st_actionp, st_goto, actlist = self.process_state_transitions(st, I, st_action, Precedence, + Productions, action, goto, log) - else: - raise LALRError('Unknown conflict in state %d' % st) - else: - st_action[a] = j - st_actionp[a] = p - - # Print the actions associated with each terminal - _actprint = {} - for a, p, m in actlist: - if a in st_action: - if p is st_actionp[a]: - log.info(' %-15s %s', a, m) - _actprint[(a, m)] = 1 - log.info('') - # Print the actions that were not used. (debugging) - not_used = 0 - for a, p, m in actlist: - if a in st_action: - if p is not st_actionp[a]: - if not (a, m) in _actprint: - log.debug(' ! %-15s [ %s ]', a, m) - not_used = 1 - _actprint[(a, m)] = 1 - if not_used: - log.debug('') - - # Construct the goto table for this state - - nkeys = {} - for ii in I: - for s in ii.usyms: - if s in self.grammar.Nonterminals: - nkeys[s] = None - for n in nkeys: - g = self.lr0_goto(I, n) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - st_goto[n] = j - log.info(' %-30s shift and go to state %d', n, j) + # Logging actions + self._log_actions(st_action, st_actionp, actlist, log) + + # Handle not used actions + self._handle_not_used_actions(st_action, st_actionp, actlist, log) + # Handle state transitions for nonterminals + self._handle_state_transitions_for_nonterminals(I, st_goto, log) + + # Save action and goto for the current state action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 + def _log_productions(self, I, log): + """ + Log the productions in a given state I. + """ + for p in I: + log.info(f' ({p.number}) {p}') + + def _log_actions(self, st_action, st_actionp, actlist, log): + """ + Log actions for a given state transition. + """ + for a, p, m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(' %-15s %s', a, m) + + def _handle_not_used_actions(self, st_action, st_actionp, actlist, log): + """ + Handle actions that are not used and log them. + """ + _actprint = {} + not_used = False + for a, p, m in actlist: + if a in st_action: + not_used = self._check_not_used_action(a, p, st_actionp, m, _actprint, log) or not_used + if not_used: + log.debug('') + + def _check_not_used_action(self, a, p, st_actionp, m, _actprint, log): + """ + Check if the action is not used and log it. + """ + if p is not st_actionp[a]: + if (a, m) not in _actprint: + log.debug(f' ! %-15s [ {m} ]') + _actprint[(a, m)] = 1 + return True + return False + + def _handle_state_transitions_for_nonterminals(self, I, st_goto, log): + """ + Handle state transitions for nonterminals and log the corresponding transitions. + """ + nkeys = {} + for ii in I: + for s in ii.usyms: + if s in self.grammar.Nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + st_goto[n] = j + log.info(f' %-30s shift and go to state {j}') + # ----------------------------------------------------------------------------- # @@ -1866,30 +1752,17 @@ def get_caller_module_dict(levels): # ----------------------------------------------------------------------------- def parse_grammar(doc, file, line): grammar = [] - # Split the doc string into lines pstrings = doc.splitlines() - lastp = None dline = line + lastp = None + for ps in pstrings: dline += 1 p = ps.split() if not p: continue try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) - prodname = lastp - syms = p[1:] - else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] - if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - + prodname, syms, lastp = parse_rule(p, lastp, dline, file, ps) grammar.append((file, dline, prodname, syms)) except SyntaxError: raise @@ -1899,6 +1772,23 @@ def parse_grammar(doc, file, line): return grammar +def parse_rule(p, lastp, dline, file, ps): + if p[0] == '|': + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) + + return prodname, syms, lastp + + # ----------------------------------------------------------------------------- # # This class represents information extracted for building a parser including @@ -1955,16 +1845,6 @@ class ParserReflect(object): pass return ''.join(parts) - # ----------------------------------------------------------------------------- - # - # This method checks to see if there are duplicated p_rulename() functions - # in the parser module file. Without this function, it is really easy for - # users to make mistakes by cutting and pasting code fragments (and it's a real - # bugger to try and figure out why the resulting parser doesn't work). Therefore, - # we just do a little regular expression pattern matching of def statements - # to try and detect duplicates. - # ----------------------------------------------------------------------------- - def validate_modules(self): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') @@ -1974,20 +1854,24 @@ class ParserReflect(object): lines, linen = inspect.getsourcelines(module) except IOError: continue + self.check_function_redefinitions(lines, fre, module) + + def check_function_redefinitions(self, lines, fre, module): + counthash = {} + for linen, line in enumerate(lines, 1): + m = fre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if prev: + self.report_redefinition(module, linen, name, prev) + else: + counthash[name] = linen - counthash = {} - for linen, line in enumerate(lines): - linen += 1 - m = fre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - filename = inspect.getsourcefile(module) - self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', - filename, linen, name, prev) + def report_redefinition(self, module, linen, name, prev): + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) # Get the start symbol def get_start(self): @@ -2028,11 +1912,6 @@ class ParserReflect(object): # Get the tokens map def get_tokens(self): tokens = self.pdict.get('tokens') - if not tokens: - self.log.error('No token list is defined') - self.error = True - return - if not isinstance(tokens, (list, tuple)): self.log.error('tokens must be a list or tuple') self.error = True @@ -2071,6 +1950,7 @@ class ParserReflect(object): self.log.error('precedence must be a list or tuple') self.error = True return + for level, p in enumerate(self.prec): if not isinstance(p, (list, tuple)): self.log.error('Bad precedence table') @@ -2081,19 +1961,26 @@ class ParserReflect(object): self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) self.error = True return + assoc = p[0] if not isinstance(assoc, str): self.log.error('precedence associativity must be a string') self.error = True return - for term in p[1:]: - if not isinstance(term, str): - self.log.error('precedence items must be strings') - self.error = True - return - preclist.append((term, assoc, level + 1)) + + # 提取内部逻辑到一个子函数 + self._validate_terms_and_append(p[1:], assoc, level + 1, preclist) + self.preclist = preclist + def _validate_terms_and_append(self, terms, assoc, level, preclist): + for term in terms: + if not isinstance(term, str): + self.log.error('precedence items must be strings') + self.error = True + return + preclist.append((term, assoc, level + 1)) + # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] @@ -2155,7 +2042,6 @@ class ParserReflect(object): # Secondary validation step that looks for p_ definitions that are not functions # or functions that look like they might be grammar rules. - for n, v in self.pdict.items(): if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): continue @@ -2163,19 +2049,27 @@ class ParserReflect(object): continue if n.startswith('p_') and n != 'p_error': self.log.warning('%r not defined as a function', n) - if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or - (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): - if v.__doc__: - try: - doc = v.__doc__.split(' ') - if doc[1] == ':': - self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', - v.__code__.co_filename, v.__code__.co_firstlineno, n) - except IndexError: - pass + + self._check_possible_grammar_rule(v, n) self.grammar = grammar + def _check_possible_grammar_rule(self, v, n): + """ + Helper function to check if a function might be a possible grammar rule. + This is extracted from the loop to reduce complexity. + """ + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass + # ----------------------------------------------------------------------------- # diff --git "a/script/local/parser/\346\226\207\346\263\225.md" "b/script/local/parser/\346\226\207\346\263\225.md" index da91b461..f2d08454 100644 --- "a/script/local/parser/\346\226\207\346\263\225.md" +++ "b/script/local/parser/\346\226\207\346\263\225.md" @@ -19,13 +19,13 @@ term : term TIMES factor | term DIVIDE factor | term MOD factor | factor -factor : NUMBER - | STRING - | ID +factor : number + | string + | id | NULL | TRUE | FALSE | LPAREN expr RPAREN -function : ID LPAREN variables RPAREN +function : id LPAREN variables RPAREN variables : variables COMMA expr | expr -- Gitee From 6a8405c802d6d3f8092bee4060e647c3cd06125e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Mon, 9 Dec 2024 10:48:17 +0000 Subject: [PATCH 34/87] 7456374 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/myLexer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/script/local/parser/myLexer.py b/script/local/parser/myLexer.py index 8ee2f75c..06f8e6e1 100644 --- a/script/local/parser/myLexer.py +++ b/script/local/parser/myLexer.py @@ -16,7 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : myLexer.py is a utility to check security configurations info on local node. +# Description : my_lexer.py is a utility to check security configurations info on local node. ############################################################################# import os import sys @@ -34,9 +34,9 @@ reserved = { } tokens = [ - 'NUMBER', - 'STRING', - 'ID', + 'number', + 'string', + 'id', 'AND', 'OR', 'EQUAL', @@ -81,7 +81,7 @@ token_dict = { 'MOD' : '%', } -class MyLexer(): +class my_lexer(): tokens = tokens @@ -106,19 +106,19 @@ class MyLexer(): t_RPAREN = r'\)' t_MOD = r'\%' - def t_NUMBER(self, t): + def t_number(self, t): r'-?[0-9]+(\.[0-9]+)?' t.value = Decimal(t.value) return t - def t_STRING(self, t): + def t_string(self, t): r'"[^"]*"' t.value = t.value[1:-1] return t - def t_ID(self, t): + def t_id(self, t): r'[a-zA-Z_][a-zA-Z_0-9]*' - t.type = self.reserved.get(t.value,'ID') + t.type = self.reserved.get(t.value,'id') return t # Define a rule so we can track line numbers -- Gitee From 5bfcafd775d0d157c53e7f6731b9abc764474598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Mon, 9 Dec 2024 10:48:45 +0000 Subject: [PATCH 35/87] asgdfgjfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/{myLexer.py => my_lexer.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename script/local/parser/{myLexer.py => my_lexer.py} (100%) diff --git a/script/local/parser/myLexer.py b/script/local/parser/my_lexer.py similarity index 100% rename from script/local/parser/myLexer.py rename to script/local/parser/my_lexer.py -- Gitee From f83a7472f37a402f45796c80db07f6308cd2f43e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Mon, 9 Dec 2024 10:49:15 +0000 Subject: [PATCH 36/87] sdhdrujhe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/{myYACC.py => my_yacc.py} | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename script/local/parser/{myYACC.py => my_yacc.py} (92%) diff --git a/script/local/parser/myYACC.py b/script/local/parser/my_yacc.py similarity index 92% rename from script/local/parser/myYACC.py rename to script/local/parser/my_yacc.py index 362d1336..0dad94a0 100644 --- a/script/local/parser/myYACC.py +++ b/script/local/parser/my_yacc.py @@ -16,7 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : myYACC.py is a utility to check security configurations info on local node. +# Description : my_yacc.py is a utility to check security configurations info on local node. ############################################################################# import os import sys @@ -24,8 +24,8 @@ import sys localDirPath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(sys.path[0] + "/../") -from local.parser.myLexer import tokens -from local.parser.myLexer import token_dict +from local.parser.my_lexer import tokens +from local.parser.my_lexer import token_dict from local.parser.functions import get_function from local.parser.variables import get_variable from local.parser.yacc import yacc @@ -33,7 +33,7 @@ from local.parser.yacc import yacc def exec_fn(fn): fn[0](*fn[1]) -class MyYACC(): +class my_yacc(): tokens = tokens @@ -128,13 +128,13 @@ class MyYACC(): def p_factor_assign_simple(p): ''' - factor : NUMBER - | STRING + factor : number + | string ''' p[0] = p[1] def p_factor_id(p): - 'factor : ID' + 'factor : id' p[0] = get_variable(p[1]) def p_factor_null(p): @@ -156,7 +156,7 @@ class MyYACC(): p[0] = p[2] def p_function(p): - 'function : ID LPAREN variables RPAREN' + 'function : id LPAREN variables RPAREN' p[0] = (get_function(p[1]), p[3]) def p_variables_comma(p): @@ -176,4 +176,4 @@ class MyYACC(): raise Exception('Syntax error in input!') def build(self): - self.yacc = yacc(module=MyYACC) + self.yacc = yacc(module=my_yacc) -- Gitee From f0d5d1c70be96d5d4afe1d5732afbdfbe86d70fc Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 9 Dec 2024 18:56:32 +0800 Subject: [PATCH 37/87] =?UTF-8?q?43743783=E9=80=9F=E5=BA=A6=E5=8F=91?= =?UTF-8?q?=E8=B4=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/parser.out | 8 ++++---- script/local/parser/parsetab.py | 4 ++-- "script/local/parser/\346\226\207\346\263\225.md" | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/script/local/parser/parser.out b/script/local/parser/parser.out index 1cabbb24..c6d8a5cd 100644 --- a/script/local/parser/parser.out +++ b/script/local/parser/parser.out @@ -31,7 +31,7 @@ Rule 25 factor -> NULL Rule 26 factor -> TRUE Rule 27 factor -> FALSE Rule 28 factor -> LPAREN expr RPAREN -Rule 29 function -> id LPAREN variables RPAREN +Rule 29 function -> ID LPAREN variables RPAREN Rule 30 variables -> variables COMMA expr Rule 31 variables -> expr @@ -464,7 +464,7 @@ state 16 state 17 (1) sentence -> conditions THEN . function - (29) function -> . id LPAREN variables RPAREN + (29) function -> . ID LPAREN variables RPAREN ID shift and go to state 36 @@ -1286,7 +1286,7 @@ state 54 state 55 - (29) function -> id LPAREN variables . RPAREN + (29) function -> ID LPAREN variables . RPAREN (30) variables -> variables . COMMA expr RPAREN shift and go to state 57 @@ -1307,7 +1307,7 @@ state 56 state 57 - (29) function -> id LPAREN variables RPAREN . + (29) function -> ID LPAREN variables RPAREN . $end reduce using rule 29 (function -> ID LPAREN variables RPAREN .) diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py index 38a20c9f..766beb93 100644 --- a/script/local/parser/parsetab.py +++ b/script/local/parser/parsetab.py @@ -68,12 +68,12 @@ _lr_productions = [ ('term -> factor','term',1,'p_term_factor','my_yacc.py',104), ('factor -> number','factor',1,'p_factor_assign_simple','my_yacc.py',109), ('factor -> string','factor',1,'p_factor_assign_simple','my_yacc.py',110), - ('factor -> id','factor',1,'p_factor_id','my_yacc.py',115), + ('factor -> ID','factor',1,'p_factor_id','my_yacc.py',115), ('factor -> NULL','factor',1,'p_factor_null','my_yacc.py',119), ('factor -> TRUE','factor',1,'p_factor_bool','my_yacc.py',124), ('factor -> FALSE','factor',1,'p_factor_bool','my_yacc.py',125), ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','my_yacc.py',133), - ('function -> id LPAREN variables RPAREN','function',4,'p_function','my_yacc.py',137), + ('function -> ID LPAREN variables RPAREN','function',4,'p_function','my_yacc.py',137), ('variables -> variables COMMA expr','variables',3,'p_variables_comma','my_yacc.py',141), ('variables -> expr','variables',1,'p_variables_factor','my_yacc.py',146), ] diff --git "a/script/local/parser/\346\226\207\346\263\225.md" "b/script/local/parser/\346\226\207\346\263\225.md" index f2d08454..01899463 100644 --- "a/script/local/parser/\346\226\207\346\263\225.md" +++ "b/script/local/parser/\346\226\207\346\263\225.md" @@ -21,11 +21,11 @@ term : term TIMES factor | factor factor : number | string - | id + | ID | NULL | TRUE | FALSE | LPAREN expr RPAREN -function : id LPAREN variables RPAREN +function : ID LPAREN variables RPAREN variables : variables COMMA expr | expr -- Gitee From ed9aef0db5108b5d42f6202f564e6ec9822f4c65 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 10 Dec 2024 14:59:53 +0800 Subject: [PATCH 38/87] dsgyeryhdfhdfh --- script/local/LocalCheckSE.py | 8 +- script/local/parser/my_lexer.py | 2 +- script/local/parser/my_yacc.py | 2 +- script/local/parser/parser.out | 8 +- script/local/parser/parsetab.py | 62 +- script/local/parser/yacc.py | 697 ++++++++---------- .../local/parser/\346\226\207\346\263\225.md" | 4 +- 7 files changed, 342 insertions(+), 441 deletions(-) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index 780c38d6..b1e43614 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -38,8 +38,8 @@ from domain_utils.cluster_file.version_info import VersionInfo from base_utils.os.net_util import NetUtil from domain_utils.domain_common.cluster_constants import ClusterConstants from datetime import datetime, timedelta -from local.parser.my_yacc import my_yacc -from local.parser.my_lexer import my_lexer +from local.parser.MyYacc import MyYacc +from local.parser.MyLexer import MyLexer from local.parser.utils import set_settings from local.parser.utils import set_dependency_settings from local.parser.utils import show_dependency_info @@ -5360,9 +5360,9 @@ def get_settings(): def process_dependencies(rule_files): current_path = os.path.dirname(os.path.realpath(__file__)) - m = my_lexer() + m = MyLexer() m.build() - y = my_yacc() + y = MyYacc() y.build() dependency_parser = y.yacc diff --git a/script/local/parser/my_lexer.py b/script/local/parser/my_lexer.py index 06f8e6e1..59e4296b 100644 --- a/script/local/parser/my_lexer.py +++ b/script/local/parser/my_lexer.py @@ -81,7 +81,7 @@ token_dict = { 'MOD' : '%', } -class my_lexer(): +class MyLexer(): tokens = tokens diff --git a/script/local/parser/my_yacc.py b/script/local/parser/my_yacc.py index 0dad94a0..9bcc6f73 100644 --- a/script/local/parser/my_yacc.py +++ b/script/local/parser/my_yacc.py @@ -33,7 +33,7 @@ from local.parser.yacc import yacc def exec_fn(fn): fn[0](*fn[1]) -class my_yacc(): +class MyYacc(): tokens = tokens diff --git a/script/local/parser/parser.out b/script/local/parser/parser.out index c6d8a5cd..1cabbb24 100644 --- a/script/local/parser/parser.out +++ b/script/local/parser/parser.out @@ -31,7 +31,7 @@ Rule 25 factor -> NULL Rule 26 factor -> TRUE Rule 27 factor -> FALSE Rule 28 factor -> LPAREN expr RPAREN -Rule 29 function -> ID LPAREN variables RPAREN +Rule 29 function -> id LPAREN variables RPAREN Rule 30 variables -> variables COMMA expr Rule 31 variables -> expr @@ -464,7 +464,7 @@ state 16 state 17 (1) sentence -> conditions THEN . function - (29) function -> . ID LPAREN variables RPAREN + (29) function -> . id LPAREN variables RPAREN ID shift and go to state 36 @@ -1286,7 +1286,7 @@ state 54 state 55 - (29) function -> ID LPAREN variables . RPAREN + (29) function -> id LPAREN variables . RPAREN (30) variables -> variables . COMMA expr RPAREN shift and go to state 57 @@ -1307,7 +1307,7 @@ state 56 state 57 - (29) function -> ID LPAREN variables RPAREN . + (29) function -> id LPAREN variables RPAREN . $end reduce using rule 29 (function -> ID LPAREN variables RPAREN .) diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py index 766beb93..fc71fba5 100644 --- a/script/local/parser/parsetab.py +++ b/script/local/parser/parsetab.py @@ -45,35 +45,35 @@ for _k, _v in _lr_goto_items.items(): del _lr_goto_items _lr_productions = [ ("S' -> sentence","S'",1,None,None,None), - ('sentence -> conditions THEN function','sentence',3,'p_conditions_relation_function','my_yacc.py',15), - ('conditions -> conditions OR and_conditions','conditions',3,'p_conditions_or','my_yacc.py',21), - ('conditions -> and_conditions','conditions',1,'p_conditions_and_conditions','my_yacc.py',25), - ('and_conditions -> and_conditions AND not_conditions','and_conditions',3,'p_and_conditions_and','my_yacc.py',30), - ('and_conditions -> not_conditions','and_conditions',1,'p_and_conditions_cdt','my_yacc.py',35), - ('not_conditions -> NOT cdt','not_conditions',2,'p_not_cdt','my_yacc.py',39), - ('not_conditions -> cdt','not_conditions',1,'p_not_conditions_cdt','my_yacc.py',43), - ('cdt -> expr EQUAL expr','cdt',3,'p_cdt_ops','my_yacc.py',48), - ('cdt -> expr NEQUAL expr','cdt',3,'p_cdt_ops','my_yacc.py',49), - ('cdt -> expr GE expr','cdt',3,'p_cdt_ops','my_yacc.py',50), - ('cdt -> expr GT expr','cdt',3,'p_cdt_ops','my_yacc.py',51), - ('cdt -> expr LE expr','cdt',3,'p_cdt_ops','my_yacc.py',52), - ('cdt -> expr LT expr','cdt',3,'p_cdt_ops','my_yacc.py',53), - ('cdt -> LPAREN conditions RPAREN','cdt',3,'p_cdt_parens','my_yacc.py',73), - ('expr -> expr PLUS term','expr',3,'p_expr_plus_minus','my_yacc.py',78), - ('expr -> expr MINUS term','expr',3,'p_expr_plus_minus','my_yacc.py',79), - ('expr -> term','expr',1,'p_expr_term','my_yacc.py',87), - ('term -> term TIMES factor','term',3,'p_term_times_divide_mod','my_yacc.py',92), - ('term -> term DIVIDE factor','term',3,'p_term_times_divide_mod','my_yacc.py',93), - ('term -> term MOD factor','term',3,'p_term_times_divide_mod','my_yacc.py',94), - ('term -> factor','term',1,'p_term_factor','my_yacc.py',104), - ('factor -> number','factor',1,'p_factor_assign_simple','my_yacc.py',109), - ('factor -> string','factor',1,'p_factor_assign_simple','my_yacc.py',110), - ('factor -> ID','factor',1,'p_factor_id','my_yacc.py',115), - ('factor -> NULL','factor',1,'p_factor_null','my_yacc.py',119), - ('factor -> TRUE','factor',1,'p_factor_bool','my_yacc.py',124), - ('factor -> FALSE','factor',1,'p_factor_bool','my_yacc.py',125), - ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','my_yacc.py',133), - ('function -> ID LPAREN variables RPAREN','function',4,'p_function','my_yacc.py',137), - ('variables -> variables COMMA expr','variables',3,'p_variables_comma','my_yacc.py',141), - ('variables -> expr','variables',1,'p_variables_factor','my_yacc.py',146), + ('sentence -> conditions THEN function','sentence',3,'p_conditions_relation_function','MyYacc.py',15), + ('conditions -> conditions OR and_conditions','conditions',3,'p_conditions_or','MyYacc.py',21), + ('conditions -> and_conditions','conditions',1,'p_conditions_and_conditions','MyYacc.py',25), + ('and_conditions -> and_conditions AND not_conditions','and_conditions',3,'p_and_conditions_and','MyYacc.py',30), + ('and_conditions -> not_conditions','and_conditions',1,'p_and_conditions_cdt','MyYacc.py',35), + ('not_conditions -> NOT cdt','not_conditions',2,'p_not_cdt','MyYacc.py',39), + ('not_conditions -> cdt','not_conditions',1,'p_not_conditions_cdt','MyYacc.py',43), + ('cdt -> expr EQUAL expr','cdt',3,'p_cdt_ops','MyYacc.py',48), + ('cdt -> expr NEQUAL expr','cdt',3,'p_cdt_ops','MyYacc.py',49), + ('cdt -> expr GE expr','cdt',3,'p_cdt_ops','MyYacc.py',50), + ('cdt -> expr GT expr','cdt',3,'p_cdt_ops','MyYacc.py',51), + ('cdt -> expr LE expr','cdt',3,'p_cdt_ops','MyYacc.py',52), + ('cdt -> expr LT expr','cdt',3,'p_cdt_ops','MyYacc.py',53), + ('cdt -> LPAREN conditions RPAREN','cdt',3,'p_cdt_parens','MyYacc.py',73), + ('expr -> expr PLUS term','expr',3,'p_expr_plus_minus','MyYacc.py',78), + ('expr -> expr MINUS term','expr',3,'p_expr_plus_minus','MyYacc.py',79), + ('expr -> term','expr',1,'p_expr_term','MyYacc.py',87), + ('term -> term TIMES factor','term',3,'p_term_times_divide_mod','MyYacc.py',92), + ('term -> term DIVIDE factor','term',3,'p_term_times_divide_mod','MyYacc.py',93), + ('term -> term MOD factor','term',3,'p_term_times_divide_mod','MyYacc.py',94), + ('term -> factor','term',1,'p_term_factor','MyYacc.py',104), + ('factor -> number','factor',1,'p_factor_assign_simple','MyYacc.py',109), + ('factor -> string','factor',1,'p_factor_assign_simple','MyYacc.py',110), + ('factor -> id','factor',1,'p_factor_id','MyYacc.py',115), + ('factor -> NULL','factor',1,'p_factor_null','MyYacc.py',119), + ('factor -> TRUE','factor',1,'p_factor_bool','MyYacc.py',124), + ('factor -> FALSE','factor',1,'p_factor_bool','MyYacc.py',125), + ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','MyYacc.py',133), + ('function -> id LPAREN variables RPAREN','function',4,'p_function','MyYacc.py',137), + ('variables -> variables COMMA expr','variables',3,'p_variables_comma','MyYacc.py',141), + ('variables -> expr','variables',1,'p_variables_factor','MyYacc.py',146), ] diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 16da4989..8b8087a6 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -10,10 +10,10 @@ # # http://license.coscl.org.cn/MulanPSL2 # -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" bASIS, # WITHOUT WARRANTIES OF ANY KIND, -# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# EITHER EXPRESS OR IMPLIED, INCLUDING bUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTAbILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- # Description : LocalCheckOS.py is a utility to check OS info on local node. @@ -29,10 +29,10 @@ import inspect # Change these to modify the default behavior of yacc (if you wish) # ----------------------------------------------------------------------------- -YACC_DEBUG = False # Debugging mode. If set, yacc generates a +YACC_DEbUG = False # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directory -DEBUG_FILE = 'parser.out' # Default name of the debugging file +DEbUG_FILE = 'parser.out' # Default name of the debugging file ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode RESULT_LIMIT = 40 # Size limit of results when running in debug mode. @@ -167,7 +167,8 @@ class YaccProduction: endpos = getattr(self.slice[n], 'endlexpos', startpos) return startpos, endpos - def error(self): + @staticmethod + def error(): raise SyntaxError @@ -235,7 +236,7 @@ class LRParser: errorcount = 0 if debug: - debug.info('PARSE DEBUG START') + debug.info('PARSE DEbUG start') if not lexer: from . import lex lexer = lex.lexer @@ -380,7 +381,7 @@ class LRParser: if debug: debug.info('Done : Returning %s', format_result(result)) - debug.info('PARSE DEBUG END') + debug.info('PARSE DEbUG END') return result @@ -481,7 +482,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') # # name - Name of the production. For example 'expr' # prod - A list of symbols on the right side ['expr','PLUS','term'] -# prec - Production precedence level +# prec - Production precedencelevel # number - Production number. # func - Function that executes on reduce # file - File where production function is defined @@ -557,7 +558,7 @@ class Production(object): p.lr_before = None return p - # Bind the production function name to a callable + # bind the production function name to a callable def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -637,72 +638,72 @@ class GrammarError(YaccError): class Grammar(object): def __init__(self, terminals): - self.Productions = [None] # A list of all of the productions. The first + self.productions = [None] # A list of all of the productions. The first # entry is always reserved for the purpose of # building an augmented grammar - self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + self.prodnames = {} # A dictionary mapping the names of nonterminals to a list of all # productions of that nonterminal. - self.Prodmap = {} # A dictionary that is only used to detect duplicate + self.prodmap = {} # A dictionary that is only used to detect duplicate # productions. - self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + self.terminals = {} # A dictionary mapping the names of terminal symbols to a # list of the rules where they are used. for term in terminals: - self.Terminals[term] = [] + self.terminals[term] = [] - self.Terminals['error'] = [] + self.terminals['error'] = [] - self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + self.nonterminals = {} # A dictionary mapping names of nonterminals to a list # of rule numbers where they are used. - self.First = {} # A dictionary of precomputed FIRST(x) symbols + self.first = {} # A dictionary of precomputed first(x) symbols - self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols + self.follow = {} # A dictionary of precomputed follow(x) symbols - self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + self.precedence= {} # precedencerules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + self.usedprecedence= set() # precedencerules that were actually used by the grammer. # This is only used to provide error checking and to generate - # a warning about unused precedence rules. + # a warning about unused precedencerules. - self.Start = None # Starting symbol for the grammar + self.start = None # starting symbol for the grammar def __len__(self): - return len(self.Productions) + return len(self.productions) def __getitem__(self, index): - return self.Productions[index] + return self.productions[index] # ----------------------------------------------------------------------------- # - # Sets the precedence for a given terminal. assoc is the associativity such as + # Sets the precedencefor a given terminal. assoc is the associativity such as # 'left','right', or 'nonassoc'. level is a numeric level. # # ----------------------------------------------------------------------------- def set_precedence(self, term, assoc, level): - assert self.Productions == [None], 'Must call set_precedence() before add_production()' - if term in self.Precedence: - raise GrammarError('Precedence already specified for terminal %r' % term) + assert self.productions == [None], 'Must call set_precedence() before add_production()' + if term in self.precedence: + raise GrammarError('precedencealready specified for terminal %r' % term) if assoc not in ['left', 'right', 'nonassoc']: raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc, level) + self.precedence[term] = (assoc, level) # ----------------------------------------------------------------------------- # # Given an action function, this function assembles a production rule and - # computes its precedence level. + # computes its precedencelevel. # # The production rule is supplied as a list of symbols. For example, # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and # symbols ['expr','PLUS','term']. # - # Precedence is determined by the precedence of the right-most non-terminal - # or the precedence of a terminal specified by %prec. + # precedenceis determined by the precedenceof the right-most non-terminal + # or the precedenceof a terminal specified by %prec. # # A variety of error checks are performed to make sure production symbols # are valid and that %prec is used correctly. @@ -710,7 +711,7 @@ class Grammar(object): def validate_prodname(self, prodname, file, line): """Validate the production name.""" - if prodname in self.Terminals: + if prodname in self.terminals: raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token') if prodname == 'error': raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word') @@ -726,8 +727,8 @@ class Grammar(object): if len(c) > 1: raise GrammarError( f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') - if c not in self.Terminals: - self.Terminals[c] = [] + if c not in self.terminals: + self.terminals[c] = [] syms[n] = c continue except SyntaxError: @@ -736,29 +737,29 @@ class Grammar(object): raise GrammarError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') def handle_precedence(self, syms, file, line): - """Handle precedence settings in the rule.""" + """Handle precedencesettings in the rule.""" if '%prec' in syms: if syms[-1] == '%prec': raise GrammarError(f'{file}:{line}: Syntax error. Nothing follows %%prec') if syms[-2] != '%prec': raise GrammarError(f'{file}:{line}: Syntax error. %%prec can only appear at the end of a grammar rule') precname = syms[-1] - prodprec = self.Precedence.get(precname) + prodprec = self.precedence.get(precname) if not prodprec: - raise GrammarError(f'{file}:{line}: Nothing known about the precedence of {precname!r}') - self.UsedPrecedence.add(precname) + raise GrammarError(f'{file}:{line}: Nothing known about the precedenceof {precname!r}') + self.usedprecedence.add(precname) del syms[-2:] # Drop %prec from the rule return prodprec else: - # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms, self.Terminals) - return self.Precedence.get(precname, ('right', 0)) + # If no %prec, precedenceis determined by the rightmost terminal symbol + precname = rightmost_terminal(syms, self.terminals) + return self.precedence.get(precname, ('right', 0)) def check_duplicate_rule(self, prodname, syms, file, line): """Check for duplicate rule definitions.""" rule_map = f'{prodname} -> {syms}' - if rule_map in self.Prodmap: - m = self.Prodmap[rule_map] + if rule_map in self.prodmap: + m = self.prodmap[rule_map] raise GrammarError(f'{file}:{line}: Duplicate rule {rule_map}. Previous definition at {m.file}:{m.line}') def add_production(self, prodname, syms, func=None, file='', line=0): @@ -776,29 +777,29 @@ class Grammar(object): self.check_duplicate_rule(prodname, syms, file, line) # Create a new production instance - pnumber = len(self.Productions) - if prodname not in self.Nonterminals: - self.Nonterminals[prodname] = [] + pnumber = len(self.productions) + if prodname not in self.nonterminals: + self.nonterminals[prodname] = [] - # Add the production number to Terminals and Nonterminals + # Add the production number to terminals and nonterminals for t in syms: - if t in self.Terminals: - self.Terminals[t].append(pnumber) + if t in self.terminals: + self.terminals[t].append(pnumber) else: - if t not in self.Nonterminals: - self.Nonterminals[t] = [] - self.Nonterminals[t].append(pnumber) + if t not in self.nonterminals: + self.nonterminals[t] = [] + self.nonterminals[t].append(pnumber) # Create and add the production p = Production(pnumber, prodname, syms, prodprec, func, file, line) - self.Productions.append(p) - self.Prodmap[f'{prodname} -> {syms}'] = p + self.productions.append(p) + self.prodmap[f'{prodname} -> {syms}'] = p # Add to the global productions list try: - self.Prodnames[prodname].append(p) + self.prodnames[prodname].append(p) except KeyError: - self.Prodnames[prodname] = [p] + self.prodnames[prodname] = [p] # ----------------------------------------------------------------------------- # @@ -808,12 +809,12 @@ class Grammar(object): def set_start(self, start=None): if not start: - start = self.Productions[1].name - if start not in self.Nonterminals: + start = self.productions[1].name + if start not in self.nonterminals: raise GrammarError('start symbol %s undefined' % start) - self.Productions[0] = Production(0, "S'", [start]) - self.Nonterminals[start].append(0) - self.Start = start + self.productions[0] = Production(0, "S'", [start]) + self.nonterminals[start].append(0) + self.start = start # ----------------------------------------------------------------------------- # @@ -828,13 +829,13 @@ class Grammar(object): if s in reachable: return reachable.add(s) - for p in self.Prodnames.get(s, []): + for p in self.prodnames.get(s, []): for r in p.prod: mark_reachable_from(r) reachable = set() - mark_reachable_from(self.Productions[0].prod[0]) - return [s for s in self.Nonterminals if s not in reachable] + mark_reachable_from(self.productions[0].prod[0]) + return [s for s in self.nonterminals if s not in reachable] # ----------------------------------------------------------------------------- # @@ -846,16 +847,16 @@ class Grammar(object): def infinite_cycles(self): terminates = {} - # Terminals: - for t in self.Terminals: + # terminals: + for t in self.terminals: terminates[t] = True terminates['$end'] = True - # Nonterminals: + # nonterminals: # Initialize to false: - for n in self.Nonterminals: + for n in self.nonterminals: terminates[n] = False # Propagate termination until no change @@ -869,7 +870,7 @@ class Grammar(object): def propagate_termination(self, terminates): while True: some_change = False - for (n, pl) in self.Prodnames.items(): + for (n, pl) in self.prodnames.items(): some_change |= self.check_productions_for_termination(n, pl, terminates) if not some_change: break @@ -898,7 +899,7 @@ class Grammar(object): infinite = [] for (s, term) in terminates.items(): if not term: - if s not in self.Prodnames and s not in self.Terminals and s != 'error': + if s not in self.prodnames and s not in self.terminals and s != 'error': # s is used-but-not-defined, and we've already warned of that, # so it would be overkill to say that it's also non-terminating. pass @@ -906,48 +907,30 @@ class Grammar(object): infinite.append(s) return infinite - - # ----------------------------------------------------------------------------- - # - # Find all symbols that were used the grammar, but not defined as tokens or - # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. - # ----------------------------------------------------------------------------- def undefined_symbols(self): result = [] - for p in self.Productions: + for p in self.productions: if not p: continue for s in p.prod: - if s not in self.Prodnames and s not in self.Terminals and s != 'error': + if s not in self.prodnames and s not in self.terminals and s != 'error': result.append((s, p)) return result - # ----------------------------------------------------------------------------- - # - # Find all terminals that were defined, but not used by the grammar. Returns - # a list of all symbols. - # ----------------------------------------------------------------------------- def unused_terminals(self): unused_tok = [] - for s, v in self.Terminals.items(): + for s, v in self.terminals.items(): if s != 'error' and not v: unused_tok.append(s) return unused_tok - # ------------------------------------------------------------------------------ - # - # Find all grammar rules that were defined, but not used (maybe not reachable) - # Returns a list of productions. - # ------------------------------------------------------------------------------ - def unused_rules(self): unused_prod = [] - for s, v in self.Nonterminals.items(): + for s, v in self.nonterminals.items(): if not v: - p = self.Prodnames[s][0] + p = self.prodnames[s][0] unused_prod.append(p) return unused_prod @@ -955,20 +938,20 @@ class Grammar(object): # # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal - # on which precedence was applied and precedence is a string such as 'left' or + # on which precedencewas applied and precedenceis a string such as 'left' or # 'right' corresponding to the type of precedence. # ----------------------------------------------------------------------------- def unused_precedence(self): unused = [] - for termname in self.Precedence: - if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname, self.Precedence[termname][0])) + for termname in self.precedence: + if not (termname in self.terminals or termname in self.usedprecedence): + unused.append((termname, self.precedence[termname][0])) return unused def _first(self, beta): - # We are computing First(x1,x2,x3,...,xn) + # We are computing first(x1,x2,x3,...,xn) result = [] for x in beta: x_produces_empty = self._process_first_set(x, result) @@ -985,8 +968,8 @@ class Grammar(object): def _process_first_set(self, x, result): x_produces_empty = False - # Add all the non- symbols of First[x] to the result. - for f in self.First[x]: + # Add all the non- symbols of first[x] to the result. + for f in self.first[x]: if f == '': x_produces_empty = True else: @@ -995,98 +978,98 @@ class Grammar(object): return x_produces_empty def compute_first(self): - if self.First: - return self.First - # Terminals: - for t in self.Terminals: - self.First[t] = [t] - self.First['$end'] = ['$end'] - # Nonterminals: + if self.first: + return self.first + # terminals: + for t in self.terminals: + self.first[t] = [t] + self.first['$end'] = ['$end'] + # nonterminals: # Initialize to the empty set: - for n in self.Nonterminals: - self.First[n] = [] + for n in self.nonterminals: + self.first[n] = [] # Then propagate symbols until no change: while True: some_change = False some_change = self._propagate_first() if not some_change: break - return self.First + return self.first def _propagate_first(self): some_change = False - for n in self.Nonterminals: + for n in self.nonterminals: some_change |= self._update_first_set(n) return some_change def _update_first_set(self, nonterminal): some_change = False - for p in self.Prodnames[nonterminal]: + for p in self.prodnames[nonterminal]: for f in self._first(p.prod): - if f not in self.First[nonterminal]: - self.First[nonterminal].append(f) + if f not in self.first[nonterminal]: + self.first[nonterminal].append(f) some_change = True return some_change def compute_follow(self, start=None): # If already computed, return the result - if self.Follow: - return self.Follow + if self.follow: + return self.follow # If first sets not computed yet, do that first. - if not self.First: + if not self.first: self.compute_first() # Add '$end' to the follow list of the start symbol - for k in self.Nonterminals: - self.Follow[k] = [] + for k in self.nonterminals: + self.follow[k] = [] if not start: - start = self.Productions[1].name + start = self.productions[1].name - self.Follow[start] = ['$end'] + self.follow[start] = ['$end'] while True: didadd = self.process_productions() if not didadd: break - return self.Follow + return self.follow def process_productions(self): didadd = False - for p in self.Productions[1:]: + for p in self.productions[1:]: didadd = self.process_production(p, didadd) return didadd def process_production(self, p, didadd): - for i, B in enumerate(p.prod): - if B in self.Nonterminals: + for i, b in enumerate(p.prod): + if b in self.nonterminals: fst = self._first(p.prod[i + 1:]) - didadd = self.process_first_set(fst, B, p, i, didadd) + didadd = self.process_first_set(fst, b, p, i, didadd) return didadd - def process_first_set(self, fst, B, p, i, didadd): + def process_first_set(self, fst, b, p, i, didadd): hasempty = False for f in fst: - if f != '' and f not in self.Follow[B]: - self.Follow[B].append(f) + if f != '' and f not in self.follow[b]: + self.follow[b].append(f) didadd = True if f == '': hasempty = True if hasempty or i == (len(p.prod) - 1): - didadd = self.add_follow_to_nonterminal(p, B, didadd) + didadd = self.add_follow_to_nonterminal(p, b, didadd) return didadd - def add_follow_to_nonterminal(self, p, B, didadd): - for f in self.Follow[p.name]: - if f not in self.Follow[B]: - self.Follow[B].append(f) + def add_follow_to_nonterminal(self, p, b, didadd): + for f in self.follow[p.name]: + if f not in self.follow[b]: + self.follow[b].append(f) didadd = True return didadd def build_lritems(self): - for p in self.Productions: + for p in self.productions: lastlri = p i = 0 lr_items = [] @@ -1109,7 +1092,7 @@ class Grammar(object): lri = LRItem(p, i) # Precompute the list of productions immediately following try: - lri.lr_after = self.Prodnames[lri.prod[i + 1]] + lri.lr_after = self.prodnames[lri.prod[i + 1]] except (IndexError, KeyError): lri.lr_after = [] try: @@ -1121,42 +1104,48 @@ class Grammar(object): return lri -def digraph(X, R, FP): - N = {} - for x in X: - N[x] = 0 +def digraph(nodes, edges, fp): + # 初始化每个节点的状态为0 + n = {} + for node in nodes: + n[node] = 0 + stack = [] - F = {} - for x in X: - if N[x] == 0: - traverse(x, N, stack, F, X, R, FP) - return F + f = {} + + # 遍历图中的每个节点 + for node in nodes: + if n[node] == 0: + traverse(node, n, stack, f, nodes, edges, fp) + + return f -def traverse(x, N, stack, F, X, R, FP): +def traverse(x, n, stack, f, x_values, r, fp): stack.append(x) d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y, N, stack, F, X, R, FP) - N[x] = min(N[x], N[y]) - for a in F.get(y, []): - if a not in F[x]: - F[x].append(a) - if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] + n[x] = d + f[x] = fp(x) # f(x) <- f'(x) + + related = r(x) # Get y's related to x + for y in related: + if n[y] == 0: + traverse(y, n, stack, f, x_values, r, fp) + n[x] = min(n[x], n[y]) + for a in f.get(y, []): + if a not in f[x]: + f[x].append(a) + if n[x] == d: + n[stack[-1]] = MAXINT + f[stack[-1]] = f[x] element = stack.pop() while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] + n[stack[-1]] = MAXINT + f[stack[-1]] = f[x] element = stack.pop() + class LALRError(YaccError): pass @@ -1180,7 +1169,7 @@ class LRTable: # Internal attributes self.lr_action = {} # Action table self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_productions = grammar.productions # Copy of grammar Production array self.lr_goto_cache = {} # Cache of computed gotos self.lr0_cidhash = {} # Cache of closures @@ -1194,50 +1183,45 @@ class LRTable: self.sr_conflicts = [] self.rr_conflicts = [] - # Build the tables + # build the tables self.grammar.build_lritems() self.grammar.compute_first() self.grammar.compute_follow() self.lr_parse_table() - # Bind all production function names to callable objects in pdict + # bind all production function names to callable objects in pdict def bind_callables(self, pdict): for p in self.lr_productions: p.bind(pdict) - # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - - def lr0_closure(self, I): + def lr0_closure(self, input_items): self._add_count += 1 + closure_items = input_items[:] + did_add = True + while did_add: + did_add = self._process_lr0_closure(closure_items) + return closure_items - # Add everything in I to J - J = I[:] - didadd = True - while didadd: - didadd = self._process_lr0_closure(J) - - return J - - def _process_lr0_closure(self, J): + def _process_lr0_closure(self, closure_items): """ Process a single step of the lr0 closure algorithm. It tries to add new LR items to the closure. """ - didadd = False - for j in J: - for x in j.lr_after: + did_add = False + for item in closure_items: + for x in item.lr_after: if getattr(x, 'lr0_added', 0) == self._add_count: continue - # Add B --> .G to J - J.append(x.lr_next) + # Add b --> .G to closure_items + closure_items.append(x.lr_next) x.lr0_added = self._add_count - didadd = True + did_add = True - return didadd + return did_add - def lr0_goto(self, I, x): - # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I), x)) + def lr0_goto(self, input_items, x): + # first we look for a previously cached entry + g = self.lr_goto_cache.get((id(input_items), x)) if g: return g @@ -1250,7 +1234,7 @@ class LRTable: self.lr_goto_cache[x] = s gs = [] - for p in I: + for p in input_items: n = p.lr_next if n and n.lr_before == x: s1 = s.get(id(n)) @@ -1266,35 +1250,30 @@ class LRTable: s['$end'] = g else: s['$end'] = gs - self.lr_goto_cache[(id(I), x)] = g + self.lr_goto_cache[(id(input_items), x)] = g return g - # Compute the LR(0) sets of item function def lr0_items(self): - C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] + closure_set = [self.lr0_closure([self.grammar.productions[0].lr_next])] i = 0 - for I in C: - self.lr0_cidhash[id(I)] = i + for item_set in closure_set: + self.lr0_cidhash[id(item_set)] = i i += 1 - - # Loop over the items in C and each grammar symbols i = 0 - while i < len(C): - I = C[i] + while i < len(closure_set): + item_set = closure_set[i] i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = {} - for ii in I: - for s in ii.usyms: - asyms[s] = None - for x in asyms: - g = self.lr0_goto(I, x) + symbols = {} + for item in item_set: + for symbol in item.usyms: + symbols[symbol] = None + for symbol in symbols: + g = self.lr0_goto(item_set, symbol) if not g or id(g) in self.lr0_cidhash: continue - self.lr0_cidhash[id(g)] = len(C) - C.append(g) - return C + self.lr0_cidhash[id(g)] = len(closure_set) + closure_set.append(g) + return closure_set def compute_nullable_nonterminals(self): nullable = set() @@ -1306,7 +1285,7 @@ class LRTable: return nullable def _process_nullable_step(self, nullable, num_nullable): - for p in self.grammar.Productions[1:]: + for p in self.grammar.productions[1:]: if p.len == 0: nullable.add(p.name) continue @@ -1317,9 +1296,9 @@ class LRTable: nullable.add(p.name) return len(nullable) - def find_nonterminal_transitions(self, C): + def find_nonterminal_transitions(self, input_item): trans = [] - for stateno, state in enumerate(C): + for stateno, state in enumerate(input_item): for p in state: self._process_transition(p, stateno, trans) return trans @@ -1331,34 +1310,33 @@ class LRTable: """ if p.lr_index < p.len - 1: t = (stateno, p.prod[p.lr_index + 1]) - if t[1] in self.grammar.Nonterminals: + if t[1] in self.grammar.nonterminals: if t not in trans: trans.append(t) - def dr_relation(self, C, trans, nullable): - state, N = trans + def dr_relation(self, input_item, trans, nullable): + state, n = trans terms = [] - g = self.lr0_goto(C[state], N) + g = self.lr0_goto(input_item[state], n) for p in g: self._process_relation(p, terms) - if state == 0 and N == self.grammar.Productions[0].prod[0]: + if state == 0 and n == self.grammar.productions[0].prod[0]: terms.append('$end') - return terms def _process_relation(self, p, terms): if p.lr_index < p.len - 1: a = p.prod[p.lr_index + 1] - if a in self.grammar.Terminals: + if a in self.grammar.terminals: if a not in terms: terms.append(a) - def reads_relation(self, C, trans, empty): + def reads_relation(self, item, trans, empty): # Look for empty transitions rel = [] - state, N = trans + state, n = trans - g = self.lr0_goto(C[state], N) + g = self.lr0_goto(item[state], n) j = self.lr0_cidhash.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: @@ -1368,80 +1346,39 @@ class LRTable: return rel - # ----------------------------------------------------------------------------- - # - # Determines the lookback and includes relations - # - # LOOKBACK: - # - # This relation is determined by running the LR(0) state machine forward. - # For example, starting with a production "N : . A B C", we run it forward - # to obtain "N : A B C ." We then build a relationship between this final - # state and the starting state. These relationships are stored in a dictionary - # lookdict. - # - # INCLUDES: - # - # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). - # - # This relation is used to determine non-terminal transitions that occur - # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) - # - # L is essentially a prefix (which may be empty), T is a suffix that must be - # able to derive an empty string. State p' must lead to state p with the string L. - # - # ----------------------------------------------------------------------------- - - def compute_lookback_includes(self, C, trans, nullable): - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {t: 1 for t in trans} # Dictionary comprehension to simplify the creation - - # Loop over all transitions and compute lookbacks and includes - for state, N in trans: + def compute_lookback_includes(self, item, trans, nullable): + lookdict = {} + includedict = {} + dtrans = {t: 1 for t in trans} + for state, n in trans: lookb = [] includes = [] - for p in C[state]: - if p.name != N: + for p in item[state]: + if p.name != n: continue - - # Okay, we have a name match. Follow the production all the way through the state machine - self._process_lookback_and_include(C, state, p, dtrans, includes, lookb, nullable) - - # Store the computed relations + self._process_lookback_and_include(item, state, p, dtrans, includes, lookb, nullable) for i in includes: if i not in includedict: includedict[i] = [] - includedict[i].append((state, N)) - lookdict[(state, N)] = lookb - + includedict[i].append((state, n)) + lookdict[(state, n)] = lookb return lookdict, includedict - def _process_lookback_and_include(self, C, state, p, dtrans, includes, lookb, nullable): + def _process_lookback_and_include(self, item, state, p, dtrans, includes, lookb, nullable): """ Process lookback and include relations for a single production. This handles the inner `while` loop logic and `lookb` and `includes` updates. """ lr_index = p.lr_index j = state - - # Process the production from the state machine while lr_index < p.len - 1: lr_index += 1 t = p.prod[lr_index] - - # Check for non-terminal transitions if (j, t) in dtrans: - # There is a chance this is an includes relation self._process_include_relation(p, lr_index, j, t, includes, nullable) - - g = self.lr0_goto(C[j], t) # Go to next set - j = self.lr0_cidhash.get(id(g), -1) # Go to next state - - # Final state is j; now locate the production - self._process_lookback_relation(C, j, p, lookb) + g = self.lr0_goto(item[j], t) + j = self.lr0_cidhash.get(id(g), -1) + self._process_lookback_relation(item, j, p, lookb) def _process_include_relation(self, p, lr_index, j, t, includes, nullable): """ @@ -1449,19 +1386,20 @@ class LRTable: """ li = lr_index + 1 while li < p.len: - if p.prod[li] in self.grammar.Terminals: - break # No forget it + if p.prod[li] in self.grammar.terminals: + break if p.prod[li] not in nullable: break li += 1 else: includes.append((j, t)) - def _process_lookback_relation(self, C, j, p, lookb): + @staticmethod + def _process_lookback_relation(item, j, p, lookb): """ Process the lookback relation by comparing the current and previous productions. """ - for r in C[j]: + for r in item[j]: if r.name != p.name: continue if r.len != p.len: @@ -1474,36 +1412,18 @@ class LRTable: else: lookb.append((j, r)) - # ----------------------------------------------------------------------------- - # - # Given a set of LR(0) items, this function computes the read sets. - # - # - # Returns a set containing the read sets - # ----------------------------------------------------------------------------- - - def compute_read_sets(self, C, ntrans, nullable): - FP = lambda x: self.dr_relation(C, x, nullable) - R = lambda x: self.reads_relation(C, x, nullable) - F = digraph(ntrans, R, FP) - return F - - # ----------------------------------------------------------------------------- - # - # Given a set of LR(0) items, a set of non-terminal transitions, a readset, - # and an include set, this function computes the follow sets - # - # - # Inputs: - # - # Returns a set containing the follow sets - # ----------------------------------------------------------------------------- + def compute_read_sets(self, c, ntrans, nullable): + fp = lambda x: self.dr_relation(c, x, nullable) + r = lambda x: self.reads_relation(c, x, nullable) + f = digraph(ntrans, r, fp) + return f - def compute_follow_sets(self, ntrans, readsets, inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x, []) - F = digraph(ntrans, R, FP) - return F + @staticmethod + def compute_follow_sets(ntrans, readsets, inclsets): + fp = lambda x: readsets[x] + r = lambda x: inclsets.get(x, []) + f = digraph(ntrans, r, fp) + return f def add_lookaheads(self, lookbacks, followset): for trans, lb in lookbacks.items(): @@ -1514,11 +1434,13 @@ class LRTable: f = followset.get(trans, []) self._add_lookaheads_to_production(p, state, f) # Add lookaheads from followset - def _ensure_lookaheads(self, p, state): + @staticmethod + def _ensure_lookaheads(p, state): if state not in p.lookaheads: p.lookaheads[state] = [] - def _add_lookaheads_to_production(self, p, state, followset_elements): + @staticmethod + def _add_lookaheads_to_production(p, state, followset_elements): for a in followset_elements: if a not in p.lookaheads[state]: p.lookaheads[state].append(a) @@ -1529,34 +1451,31 @@ class LRTable: # with LALR parsing # ----------------------------------------------------------------------------- - def add_lalr_lookaheads(self, C): + def add_lalr_lookaheads(self, c): # Determine all of the nullable nonterminals nullable = self.compute_nullable_nonterminals() # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(C) + trans = self.find_nonterminal_transitions(c) # Compute read sets - readsets = self.compute_read_sets(C, trans, nullable) + readsets = self.compute_read_sets(c, trans, nullable) # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C, trans, nullable) + lookd, included = self.compute_lookback_includes(c, trans, nullable) - # Compute LALR FOLLOW sets + # Compute LALR follow sets followsets = self.compute_follow_sets(trans, readsets, included) # Add all of the lookaheads self.add_lookaheads(lookd, followsets) - # ----------------------------------------------------------------------------- - # - # This function constructs the parse tables for SLR or LALR - # ----------------------------------------------------------------------------- - def handle_shift_reduce_conflict(self, st, a, p, r, Precedence, Productions, log, j=None): + @staticmethod + def handle_shift_reduce_conflict(st, a, p, r, precedence, productions, log, j=None): """Handle shift/reduce conflict.""" if r > 0: - sprec, slevel = Precedence.get(a, ('right', 0)) - rprec, rlevel = Productions[p.number].prec + sprec, slevel = precedence.get(a, ('right', 0)) + rprec, rlevel = productions[p.number].prec if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): return -p.number, p, 'reduce', None elif (slevel == rlevel) and (rprec == 'nonassoc'): @@ -1564,25 +1483,26 @@ class LRTable: else: return j, p, 'shift', None elif r < 0: - oldp = Productions[-r] - pp = Productions[p.number] + oldp = productions[-r] + pp = productions[p.number] if oldp.line > pp.line: return -p.number, p, 'reduce', oldp else: return -oldp.number, oldp, 'reduce', pp return None, None, None, None - def log_shift_reduce_action(self, log, a, m): + @staticmethod + def log_shift_reduce_action(log, a, m): """Log shift/reduce or reduce/reduce actions.""" log.info(' %-15s %s', a, m) - def process_state_transitions(self, st, I, st_action, Precedence, Productions, action, goto, log): + def process_state_transitions(self, st, item, st_action, precedence, productions, action, goto, log): """Process state transitions and handle conflicts.""" st_goto = {} actlist = [] st_actionp = {} - for p in I: + for p in item: if p.len == p.lr_index + 1: if p.name == "S'": st_action['$end'] = 0 @@ -1594,12 +1514,12 @@ class LRTable: r = st_action.get(a) if r is not None: shift, new_p, action_type, reject_p = self.handle_shift_reduce_conflict(st, a, p, r, - Precedence, - Productions, log) + precedence, + productions, log) if action_type == 'reduce': st_action[a] = shift st_actionp[a] = new_p - Productions[new_p.number].reduced += 1 + productions[new_p.number].reduced += 1 elif action_type == 'shift': self.log_shift_reduce_action(self, log, a, f"shift and go to state {j}") else: @@ -1607,12 +1527,12 @@ class LRTable: else: st_action[a] = -p.number st_actionp[a] = p - Productions[p.number].reduced += 1 + productions[p.number].reduced += 1 else: i = p.lr_index a = p.prod[i + 1] - if a in self.grammar.Terminals: - g = self.lr0_goto(I, a) + if a in self.grammar.terminals: + g = self.lr0_goto(item, a) j = self.lr0_cidhash.get(id(g), -1) if j >= 0: actlist.append((a, p, f'shift and go to state {j}')) @@ -1621,10 +1541,10 @@ class LRTable: if r > 0 and r != j: raise LALRError(f'Shift/shift conflict in state {st}') elif r < 0: - sprec, slevel = Precedence.get(a, ('right', 0)) - rprec, rlevel = Productions[st_actionp[a].number].prec + sprec, slevel = precedence.get(a, ('right', 0)) + rprec, rlevel = productions[st_actionp[a].number].prec if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - Productions[st_actionp[a].number].reduced -= 1 + productions[st_actionp[a].number].reduced -= 1 st_action[a] = j st_actionp[a] = p elif slevel == rlevel and rprec == 'nonassoc': @@ -1637,52 +1557,46 @@ class LRTable: return st_action, st_actionp, st_goto, actlist def lr_parse_table(self): - Productions = self.grammar.Productions - Precedence = self.grammar.Precedence + productions = self.grammar.productions + precedence= self.grammar.precedence goto = self.lr_goto action = self.lr_action log = self.log actionp = {} - C = self.lr0_items() - self.add_lalr_lookaheads(C) + item = self.lr0_items() + self.add_lalr_lookaheads(item) st = 0 - for I in C: + for i in item: log.info('') log.info(f'state {st}') log.info('') - self._log_productions(I, log) # Log productions for the current state + self._log_productions(i, log) # Log productions for the current state log.info('') # Process the state transitions and conflicts st_action = {} st_actionp = {} st_goto = {} - st_action, st_actionp, st_goto, actlist = self.process_state_transitions(st, I, st_action, Precedence, - Productions, action, goto, log) - - # Logging actions + st_action, st_actionp, st_goto, actlist = self.process_state_transitions(st, i, st_action, precedence, + productions, action, goto, log) self._log_actions(st_action, st_actionp, actlist, log) - - # Handle not used actions self._handle_not_used_actions(st_action, st_actionp, actlist, log) - - # Handle state transitions for nonterminals - self._handle_state_transitions_for_nonterminals(I, st_goto, log) - - # Save action and goto for the current state + self._handle_state_transitions_for_nonterminals(i, st_goto, log) action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 - def _log_productions(self, I, log): + @staticmethod + def _log_productions(item, log): """ Log the productions in a given state I. """ - for p in I: + for p in item: log.info(f' ({p.number}) {p}') - def _log_actions(self, st_action, st_actionp, actlist, log): + @staticmethod + def _log_actions(st_action, st_actionp, actlist, log): """ Log actions for a given state transition. """ @@ -1703,7 +1617,8 @@ class LRTable: if not_used: log.debug('') - def _check_not_used_action(self, a, p, st_actionp, m, _actprint, log): + @staticmethod + def _check_not_used_action(a, p, st_actionp, m, _actprint, log): """ Check if the action is not used and log it. """ @@ -1714,30 +1629,23 @@ class LRTable: return True return False - def _handle_state_transitions_for_nonterminals(self, I, st_goto, log): + def _handle_state_transitions_for_nonterminals(self, item, st_goto, log): """ Handle state transitions for nonterminals and log the corresponding transitions. """ nkeys = {} - for ii in I: + for ii in item: for s in ii.usyms: - if s in self.grammar.Nonterminals: + if s in self.grammar.nonterminals: nkeys[s] = None for n in nkeys: - g = self.lr0_goto(I, n) + g = self.lr0_goto(item, n) j = self.lr0_cidhash.get(id(g), -1) if j >= 0: st_goto[n] = j log.info(f' %-30s shift and go to state {j}') -# ----------------------------------------------------------------------------- -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - def get_caller_module_dict(levels): f = sys._getframe(levels) ldict = f.f_globals.copy() @@ -1788,13 +1696,6 @@ def parse_rule(p, lastp, dline, file, ps): return prodname, syms, lastp - -# ----------------------------------------------------------------------------- -# -# This class represents information extracted for building a parser including -# start symbol, error function, tokens, precedence list, action functions, -# etc. -# ----------------------------------------------------------------------------- class ParserReflect(object): def __init__(self, pdict, log=None): self.pdict = pdict @@ -1938,33 +1839,33 @@ class ParserReflect(object): self.log.warning('Token %r multiply defined', n) terminals.add(n) - # Get the precedence map (if any) + # Get the precedencemap (if any) def get_precedence(self): self.prec = self.pdict.get('precedence') - # Validate and parse the precedence map + # Validate and parse the precedencemap def validate_precedence(self): preclist = [] if self.prec: if not isinstance(self.prec, (list, tuple)): - self.log.error('precedence must be a list or tuple') + self.log.error('precedencemust be a list or tuple') self.error = True return for level, p in enumerate(self.prec): if not isinstance(p, (list, tuple)): - self.log.error('Bad precedence table') + self.log.error('bad precedencetable') self.error = True return if len(p) < 2: - self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.log.error('Malformed precedenceentry %s. Must be (assoc, term, ..., term)', p) self.error = True return assoc = p[0] if not isinstance(assoc, str): - self.log.error('precedence associativity must be a string') + self.log.error('precedenceassociativity must be a string') self.error = True return @@ -1976,7 +1877,7 @@ class ParserReflect(object): def _validate_terms_and_append(self, terms, assoc, level, preclist): for term in terms: if not isinstance(term, str): - self.log.error('precedence items must be strings') + self.log.error('precedenceitems must be strings') self.error = True return preclist.append((term, assoc, level + 1)) @@ -2073,11 +1974,11 @@ class ParserReflect(object): # ----------------------------------------------------------------------------- # -# Build a parser +# build a parser # ----------------------------------------------------------------------------- -def yacc(*, debug=YACC_DEBUG, module=None, start=None, - check_recursion=True, optimize=False, debugfile=DEBUG_FILE, +def yacc(*, debug=YACC_DEbUG, module=None, start=None, + check_recursion=True, optimize=False, debugfile=DEbUG_FILE, debuglog=None, errorlog=None): # Reference to the parsing method of the last built parser global parse @@ -2132,7 +2033,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, # Create a grammar object grammar = Grammar(pinfo.tokens) - # Set precedence level for terminals + # Set precedencelevel for terminals for term, assoc, level in pinfo.preclist: try: grammar.set_precedence(term, assoc, level) @@ -2181,7 +2082,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, debuglog.info('') debuglog.info('Grammar') debuglog.info('') - for n, p in enumerate(grammar.Productions): + for n, p in enumerate(grammar.productions): debuglog.info('Rule %-5d %s', n, p) # Find unused non-terminals @@ -2201,20 +2102,20 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, if debug: debuglog.info('') - debuglog.info('Terminals, with rules where they appear') + debuglog.info('terminals, with rules where they appear') debuglog.info('') - terms = list(grammar.Terminals) + terms = list(grammar.terminals) terms.sort() for term in terms: - debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.terminals[term]])) debuglog.info('') - debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('nonterminals, with rules where they appear') debuglog.info('') - nonterms = list(grammar.Nonterminals) + nonterms = list(grammar.nonterminals) nonterms.sort() for nonterm in nonterms: - debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.nonterminals[nonterm]])) debuglog.info('') if check_recursion: @@ -2229,7 +2130,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: - errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errorlog.error('precedencerule %r defined for unknown symbol %r', assoc, term) errors = True if errors: @@ -2278,7 +2179,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, errorlog.warning('Rule (%s) is never reduced', rejected) warned_never.append(rejected) - # Build the parser + # build the parser lr.bind_callables(pinfo.pdict) parser = LRParser(lr, pinfo.error_func) diff --git "a/script/local/parser/\346\226\207\346\263\225.md" "b/script/local/parser/\346\226\207\346\263\225.md" index 01899463..f2d08454 100644 --- "a/script/local/parser/\346\226\207\346\263\225.md" +++ "b/script/local/parser/\346\226\207\346\263\225.md" @@ -21,11 +21,11 @@ term : term TIMES factor | factor factor : number | string - | ID + | id | NULL | TRUE | FALSE | LPAREN expr RPAREN -function : ID LPAREN variables RPAREN +function : id LPAREN variables RPAREN variables : variables COMMA expr | expr -- Gitee From 142a9c58e8dd4d3d4abb71ea276a204ca62b90d1 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 10 Dec 2024 15:09:45 +0800 Subject: [PATCH 39/87] sagfasg --- .../local/parser/{my_lexer.py => MyLexer.py} | 23 +++++++++++-------- script/local/parser/{my_yacc.py => MyYacc.py} | 8 +++---- 2 files changed, 18 insertions(+), 13 deletions(-) rename script/local/parser/{my_lexer.py => MyLexer.py} (90%) rename script/local/parser/{my_yacc.py => MyYacc.py} (95%) diff --git a/script/local/parser/my_lexer.py b/script/local/parser/MyLexer.py similarity index 90% rename from script/local/parser/my_lexer.py rename to script/local/parser/MyLexer.py index 59e4296b..efb36e63 100644 --- a/script/local/parser/my_lexer.py +++ b/script/local/parser/MyLexer.py @@ -16,7 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : my_lexer.py is a utility to check security configurations info on local node. +# Description : MyLexer.py is a utility to check security configurations info on local node. ############################################################################# import os import sys @@ -105,24 +105,28 @@ class MyLexer(): t_LPAREN = r'\(' t_RPAREN = r'\)' t_MOD = r'\%' - - def t_number(self, t): + + @staticmethod + def t_number(t): r'-?[0-9]+(\.[0-9]+)?' t.value = Decimal(t.value) return t - - def t_string(self, t): + + @staticmethod + def t_string(t): r'"[^"]*"' t.value = t.value[1:-1] return t - def t_id(self, t): + @staticmethod + def t_id(t): r'[a-zA-Z_][a-zA-Z_0-9]*' - t.type = self.reserved.get(t.value,'id') + t.type = reserved.get(t.value,'id') return t # Define a rule so we can track line numbers - def t_newline(self,t): + @staticmethod + def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) @@ -130,7 +134,8 @@ class MyLexer(): t_ignore = ' \t' # Error handling rule - def t_error(self,t): + @staticmethod + def t_error(t): raise Exception('Illegal character "%s"' % t.value[0]) t.lexer.skip(1) diff --git a/script/local/parser/my_yacc.py b/script/local/parser/MyYacc.py similarity index 95% rename from script/local/parser/my_yacc.py rename to script/local/parser/MyYacc.py index 9bcc6f73..41c9705e 100644 --- a/script/local/parser/my_yacc.py +++ b/script/local/parser/MyYacc.py @@ -16,7 +16,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : my_yacc.py is a utility to check security configurations info on local node. +# Description : MyYacc.py is a utility to check security configurations info on local node. ############################################################################# import os import sys @@ -24,8 +24,8 @@ import sys localDirPath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(sys.path[0] + "/../") -from local.parser.my_lexer import tokens -from local.parser.my_lexer import token_dict +from local.parser.MyLexer import tokens +from local.parser.MyLexer import token_dict from local.parser.functions import get_function from local.parser.variables import get_variable from local.parser.yacc import yacc @@ -176,4 +176,4 @@ class MyYacc(): raise Exception('Syntax error in input!') def build(self): - self.yacc = yacc(module=my_yacc) + self.yacc = yacc(module=MyYacc) -- Gitee From 5344941db614f96c2edab74b8885f013921b9ccd Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 10 Dec 2024 17:27:26 +0800 Subject: [PATCH 40/87] aSgasg --- script/local/LocalCheckSE.py | 4 +- .../local/parser/{MyLexer.py => my_lexer.py} | 0 script/local/parser/{MyYacc.py => my_yacc.py} | 4 +- script/local/parser/parsetab.py | 62 +++---- script/local/parser/yacc.py | 155 +++++++----------- 5 files changed, 94 insertions(+), 131 deletions(-) rename script/local/parser/{MyLexer.py => my_lexer.py} (100%) rename script/local/parser/{MyYacc.py => my_yacc.py} (98%) diff --git a/script/local/LocalCheckSE.py b/script/local/LocalCheckSE.py index b1e43614..646364cd 100644 --- a/script/local/LocalCheckSE.py +++ b/script/local/LocalCheckSE.py @@ -38,8 +38,8 @@ from domain_utils.cluster_file.version_info import VersionInfo from base_utils.os.net_util import NetUtil from domain_utils.domain_common.cluster_constants import ClusterConstants from datetime import datetime, timedelta -from local.parser.MyYacc import MyYacc -from local.parser.MyLexer import MyLexer +from local.parser.my_yacc import MyYacc +from local.parser.my_lexer import MyLexer from local.parser.utils import set_settings from local.parser.utils import set_dependency_settings from local.parser.utils import show_dependency_info diff --git a/script/local/parser/MyLexer.py b/script/local/parser/my_lexer.py similarity index 100% rename from script/local/parser/MyLexer.py rename to script/local/parser/my_lexer.py diff --git a/script/local/parser/MyYacc.py b/script/local/parser/my_yacc.py similarity index 98% rename from script/local/parser/MyYacc.py rename to script/local/parser/my_yacc.py index 41c9705e..126888ce 100644 --- a/script/local/parser/MyYacc.py +++ b/script/local/parser/my_yacc.py @@ -24,8 +24,8 @@ import sys localDirPath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(sys.path[0] + "/../") -from local.parser.MyLexer import tokens -from local.parser.MyLexer import token_dict +from local.parser.my_lexer import tokens +from local.parser.my_lexer import token_dict from local.parser.functions import get_function from local.parser.variables import get_variable from local.parser.yacc import yacc diff --git a/script/local/parser/parsetab.py b/script/local/parser/parsetab.py index fc71fba5..38a20c9f 100644 --- a/script/local/parser/parsetab.py +++ b/script/local/parser/parsetab.py @@ -45,35 +45,35 @@ for _k, _v in _lr_goto_items.items(): del _lr_goto_items _lr_productions = [ ("S' -> sentence","S'",1,None,None,None), - ('sentence -> conditions THEN function','sentence',3,'p_conditions_relation_function','MyYacc.py',15), - ('conditions -> conditions OR and_conditions','conditions',3,'p_conditions_or','MyYacc.py',21), - ('conditions -> and_conditions','conditions',1,'p_conditions_and_conditions','MyYacc.py',25), - ('and_conditions -> and_conditions AND not_conditions','and_conditions',3,'p_and_conditions_and','MyYacc.py',30), - ('and_conditions -> not_conditions','and_conditions',1,'p_and_conditions_cdt','MyYacc.py',35), - ('not_conditions -> NOT cdt','not_conditions',2,'p_not_cdt','MyYacc.py',39), - ('not_conditions -> cdt','not_conditions',1,'p_not_conditions_cdt','MyYacc.py',43), - ('cdt -> expr EQUAL expr','cdt',3,'p_cdt_ops','MyYacc.py',48), - ('cdt -> expr NEQUAL expr','cdt',3,'p_cdt_ops','MyYacc.py',49), - ('cdt -> expr GE expr','cdt',3,'p_cdt_ops','MyYacc.py',50), - ('cdt -> expr GT expr','cdt',3,'p_cdt_ops','MyYacc.py',51), - ('cdt -> expr LE expr','cdt',3,'p_cdt_ops','MyYacc.py',52), - ('cdt -> expr LT expr','cdt',3,'p_cdt_ops','MyYacc.py',53), - ('cdt -> LPAREN conditions RPAREN','cdt',3,'p_cdt_parens','MyYacc.py',73), - ('expr -> expr PLUS term','expr',3,'p_expr_plus_minus','MyYacc.py',78), - ('expr -> expr MINUS term','expr',3,'p_expr_plus_minus','MyYacc.py',79), - ('expr -> term','expr',1,'p_expr_term','MyYacc.py',87), - ('term -> term TIMES factor','term',3,'p_term_times_divide_mod','MyYacc.py',92), - ('term -> term DIVIDE factor','term',3,'p_term_times_divide_mod','MyYacc.py',93), - ('term -> term MOD factor','term',3,'p_term_times_divide_mod','MyYacc.py',94), - ('term -> factor','term',1,'p_term_factor','MyYacc.py',104), - ('factor -> number','factor',1,'p_factor_assign_simple','MyYacc.py',109), - ('factor -> string','factor',1,'p_factor_assign_simple','MyYacc.py',110), - ('factor -> id','factor',1,'p_factor_id','MyYacc.py',115), - ('factor -> NULL','factor',1,'p_factor_null','MyYacc.py',119), - ('factor -> TRUE','factor',1,'p_factor_bool','MyYacc.py',124), - ('factor -> FALSE','factor',1,'p_factor_bool','MyYacc.py',125), - ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','MyYacc.py',133), - ('function -> id LPAREN variables RPAREN','function',4,'p_function','MyYacc.py',137), - ('variables -> variables COMMA expr','variables',3,'p_variables_comma','MyYacc.py',141), - ('variables -> expr','variables',1,'p_variables_factor','MyYacc.py',146), + ('sentence -> conditions THEN function','sentence',3,'p_conditions_relation_function','my_yacc.py',15), + ('conditions -> conditions OR and_conditions','conditions',3,'p_conditions_or','my_yacc.py',21), + ('conditions -> and_conditions','conditions',1,'p_conditions_and_conditions','my_yacc.py',25), + ('and_conditions -> and_conditions AND not_conditions','and_conditions',3,'p_and_conditions_and','my_yacc.py',30), + ('and_conditions -> not_conditions','and_conditions',1,'p_and_conditions_cdt','my_yacc.py',35), + ('not_conditions -> NOT cdt','not_conditions',2,'p_not_cdt','my_yacc.py',39), + ('not_conditions -> cdt','not_conditions',1,'p_not_conditions_cdt','my_yacc.py',43), + ('cdt -> expr EQUAL expr','cdt',3,'p_cdt_ops','my_yacc.py',48), + ('cdt -> expr NEQUAL expr','cdt',3,'p_cdt_ops','my_yacc.py',49), + ('cdt -> expr GE expr','cdt',3,'p_cdt_ops','my_yacc.py',50), + ('cdt -> expr GT expr','cdt',3,'p_cdt_ops','my_yacc.py',51), + ('cdt -> expr LE expr','cdt',3,'p_cdt_ops','my_yacc.py',52), + ('cdt -> expr LT expr','cdt',3,'p_cdt_ops','my_yacc.py',53), + ('cdt -> LPAREN conditions RPAREN','cdt',3,'p_cdt_parens','my_yacc.py',73), + ('expr -> expr PLUS term','expr',3,'p_expr_plus_minus','my_yacc.py',78), + ('expr -> expr MINUS term','expr',3,'p_expr_plus_minus','my_yacc.py',79), + ('expr -> term','expr',1,'p_expr_term','my_yacc.py',87), + ('term -> term TIMES factor','term',3,'p_term_times_divide_mod','my_yacc.py',92), + ('term -> term DIVIDE factor','term',3,'p_term_times_divide_mod','my_yacc.py',93), + ('term -> term MOD factor','term',3,'p_term_times_divide_mod','my_yacc.py',94), + ('term -> factor','term',1,'p_term_factor','my_yacc.py',104), + ('factor -> number','factor',1,'p_factor_assign_simple','my_yacc.py',109), + ('factor -> string','factor',1,'p_factor_assign_simple','my_yacc.py',110), + ('factor -> id','factor',1,'p_factor_id','my_yacc.py',115), + ('factor -> NULL','factor',1,'p_factor_null','my_yacc.py',119), + ('factor -> TRUE','factor',1,'p_factor_bool','my_yacc.py',124), + ('factor -> FALSE','factor',1,'p_factor_bool','my_yacc.py',125), + ('factor -> LPAREN expr RPAREN','factor',3,'p_factor_paren','my_yacc.py',133), + ('function -> id LPAREN variables RPAREN','function',4,'p_function','my_yacc.py',137), + ('variables -> variables COMMA expr','variables',3,'p_variables_comma','my_yacc.py',141), + ('variables -> expr','variables',1,'p_variables_factor','my_yacc.py',146), ] diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 8b8087a6..1bcb07d7 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -29,10 +29,10 @@ import inspect # Change these to modify the default behavior of yacc (if you wish) # ----------------------------------------------------------------------------- -YACC_DEbUG = False # Debugging mode. If set, yacc generates a +YACC_DEBUG = False # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directory -DEbUG_FILE = 'parser.out' # Default name of the debugging file +DEBUG_FILE = 'parser.out' # Default name of the debugging file ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode RESULT_LIMIT = 40 # Size limit of results when running in debug mode. @@ -215,13 +215,6 @@ class LRParser: def disable_defaulted_states(self): self.defaulted_states = {} - # parse(). - # - # This is the core parsing engine. To operate, it requires a lexer object. - # Two options are provided. The debug flag turns on debugging so that you can - # see the various rule reductions and parsing steps. tracking turns on position - # tracking. In this mode, symbols will record the starting/ending line number and - # character index. def parse(self, put=None, lexer=None, debug=False, tracking=False): if isinstance(debug, int) and debug: @@ -458,42 +451,9 @@ class LRParser: continue raise RuntimeError('yacc: internal parser error!!!\n') - -# ----------------------------------------------------------------------------- -# === Grammar Representation === -# -# The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. -# ----------------------------------------------------------------------------- - -# regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# A grammar rule refers to a specification such as this: -# -# expr : expr PLUS term -# -# Here are the basic attributes defined on all productions -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','PLUS','term'] -# prec - Production precedencelevel -# number - Production number. -# func - Function that executes on reduce -# file - File where production function is defined -# lineno - Line number where production function is defined -# -# The following attributes are defined or optional. -# -# len - Length of the production (number of symbols on right hand side) -# usyms - Set of unique symbols found in the production -# ----------------------------------------------------------------------------- - class Production(object): reduced = 0 @@ -663,10 +623,10 @@ class Grammar(object): self.follow = {} # A dictionary of precomputed follow(x) symbols - self.precedence= {} # precedencerules for each terminal. Contains tuples of the + self.precedence = {} # precedencerules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) - self.usedprecedence= set() # precedencerules that were actually used by the grammer. + self.usedprecedence = set() # precedencerules that were actually used by the grammer. # This is only used to provide error checking and to generate # a warning about unused precedencerules. @@ -1504,61 +1464,64 @@ class LRTable: for p in item: if p.len == p.lr_index + 1: - if p.name == "S'": - st_action['$end'] = 0 - st_actionp['$end'] = p - else: - laheads = p.lookaheads[st] - for a in laheads: - actlist.append((a, p, f'reduce using rule {p.number} ({p})')) - r = st_action.get(a) - if r is not None: - shift, new_p, action_type, reject_p = self.handle_shift_reduce_conflict(st, a, p, r, - precedence, - productions, log) - if action_type == 'reduce': - st_action[a] = shift - st_actionp[a] = new_p - productions[new_p.number].reduced += 1 - elif action_type == 'shift': - self.log_shift_reduce_action(self, log, a, f"shift and go to state {j}") - else: - st_action[a] = None - else: - st_action[a] = -p.number - st_actionp[a] = p - productions[p.number].reduced += 1 + self.handle_reduce_actions(st, p, st_action, st_actionp, precedence, productions, actlist, log) else: - i = p.lr_index - a = p.prod[i + 1] - if a in self.grammar.terminals: - g = self.lr0_goto(item, a) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - actlist.append((a, p, f'shift and go to state {j}')) - r = st_action.get(a) - if r is not None: - if r > 0 and r != j: - raise LALRError(f'Shift/shift conflict in state {st}') - elif r < 0: - sprec, slevel = precedence.get(a, ('right', 0)) - rprec, rlevel = productions[st_actionp[a].number].prec - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - elif slevel == rlevel and rprec == 'nonassoc': - st_action[a] = None - else: - self.log_shift_reduce_action(self, log, a, "shift") - else: - st_action[a] = j - st_actionp[a] = p + self.handle_shift_actions(st, p, st_action, st_actionp, precedence, productions, actlist, log, item) + return st_action, st_actionp, st_goto, actlist + def handle_reduce_actions(self, st, p, st_action, st_actionp, precedence, productions, actlist, log): + """Handle reduce actions.""" + if p.name == "S'": + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, f'reduce using rule {p.number} ({p})')) + r = st_action.get(a) + if r is not None: + self.handle_shift_reduce_conflict(st, a, p, r, precedence, productions, log) + else: + st_action[a] = -p.number + st_actionp[a] = p + productions[p.number].reduced += 1 + + def handle_shift_actions(self, st, p, st_action, st_actionp, precedence, productions, actlist, log, item): + """Handle shift actions.""" + i = p.lr_index + a = p.prod[i + 1] + if a in self.grammar.terminals: + g = self.lr0_goto(item, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + actlist.append((a, p, f'shift and go to state {j}')) + r = st_action.get(a) + if r is not None: + self.handle_shift_shift_conflict(st, a, r, j, precedence, productions, st_action, st_actionp, log, p) + else: + st_action[a] = j + st_actionp[a] = p + + def handle_shift_shift_conflict(self, st, a, r, j, precedence, productions, st_action, st_actionp, log, p): + """Handle shift/shift conflicts.""" + if r > 0 and r != j: + raise LALRError(f'Shift/shift conflict in state {st}') + elif r < 0: + sprec, slevel = precedence.get(a, ('right', 0)) + rprec, rlevel = productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + elif slevel == rlevel and rprec == 'nonassoc': + st_action[a] = None + else: + self.log_shift_reduce_action(self, log, a, "shift") + def lr_parse_table(self): productions = self.grammar.productions - precedence= self.grammar.precedence + precedence = self.grammar.precedence goto = self.lr_goto action = self.lr_action log = self.log @@ -1977,8 +1940,8 @@ class ParserReflect(object): # build a parser # ----------------------------------------------------------------------------- -def yacc(*, debug=YACC_DEbUG, module=None, start=None, - check_recursion=True, optimize=False, debugfile=DEbUG_FILE, +def yacc(*, debug=YACC_DEBUG, module=None, start=None, + check_recursion=True, optimize=False, debugfile=DEBUG_FILE, debuglog=None, errorlog=None): # Reference to the parsing method of the last built parser global parse -- Gitee From 1bd4c4673b53ca3570b5fc82400d651cdfb6b5bb Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 10 Dec 2024 18:37:40 +0800 Subject: [PATCH 41/87] ewtwet --- script/local/parser/yacc.py | 299 ++++++++++++++++++------------------ 1 file changed, 146 insertions(+), 153 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 1bcb07d7..7783eb16 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -682,20 +682,27 @@ class Grammar(object): """Handle literal tokens in the rule symbols.""" for n, s in enumerate(syms): if s[0] in "'\"": - try: - c = eval(s) - if len(c) > 1: - raise GrammarError( - f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') - if c not in self.terminals: - self.terminals[c] = [] + c = self.proccess_literal_token(s, file, line, prodname) + if c is not None: syms[n] = c continue - except SyntaxError: - pass if not _is_identifier.match(s) and s != '%prec': raise GrammarError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') + def proccess_literal_token(self, s, file, line, prodname): + """处理文字(literal)token.""" + try: + c = eval(s) + if len(c) > 1: + raise GrammarError( + f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') + if c not in self.terminals: + self.terminals[c] = [] + return c + except SyntaxError: + pass + return None + def handle_precedence(self, syms, file, line): """Handle precedencesettings in the rule.""" if '%prec' in syms: @@ -1866,7 +1873,6 @@ class ParserReflect(object): p_function[3])) self.pfuncs = p_functions - # Validate all of the p_functions def validate_pfunctions(self): grammar = [] # Check for non-empty symbols @@ -1892,20 +1898,9 @@ class ParserReflect(object): self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', file, line, func.__name__) else: - try: - parsed_g = parse_grammar(doc, file, line) - for g in parsed_g: - grammar.append((name, g)) - except SyntaxError as e: - self.log.error(str(e)) - self.error = True - - # Looks like a valid grammar rule - # Mark the file in which defined. + self.process_grammar_rule(doc, file, line, name, grammar) self.modules.add(module) - # Secondary validation step that looks for p_ definitions that are not functions - # or functions that look like they might be grammar rules. for n, v in self.pdict.items(): if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): continue @@ -1918,82 +1913,123 @@ class ParserReflect(object): self.grammar = grammar + # Validate all of the p_functions + def process_grammar_rule(self, doc, file, line, name, grammar): + # 处理文档字符串并解析语法 + parsed_g = self.parse_grammar_with_error_handling(doc, file, line) + if parsed_g is not None: + for g in parsed_g: + grammar.append((name, g)) + + def parse_grammar_with_error_handling(self, doc, file, line): + try: + return parse_grammar(doc, file, line) + except SyntaxError as e: + self.log.error(str(e)) + self.error = True + return None + def _check_possible_grammar_rule(self, v, n): """ Helper function to check if a function might be a possible grammar rule. This is extracted from the loop to reduce complexity. """ - if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or - (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): - if v.__doc__: - try: - doc = v.__doc__.split(' ') - if doc[1] == ':': - self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', - v.__code__.co_filename, v.__code__.co_firstlineno, n) - except IndexError: - pass + if not self._is_possible_grammar_function(v): + return + if self._has_doc(v): + self._check_doc_for_grammar_rule(v, n) -# ----------------------------------------------------------------------------- -# -# build a parser -# ----------------------------------------------------------------------------- + def _is_possible_grammar_function(self, v): + """Check if v is a possible grammar function based on argument count.""" + return ( + (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2) + ) + + def _has_doc(self, v): + """Check if v has a docstring.""" + return v.__doc__ is not None + + def _check_doc_for_grammar_rule(self, v, n): + """Check if the docstring of v follows the expected grammar rule format.""" + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass def yacc(*, debug=YACC_DEBUG, module=None, start=None, check_recursion=True, optimize=False, debugfile=DEBUG_FILE, debuglog=None, errorlog=None): - # Reference to the parsing method of the last built parser global parse + # Initialize errorlog if None if errorlog is None: errorlog = Logger(sys.stderr) # Get the module dictionary used for the parser - if module: - _items = [(k, getattr(module, k)) for k in dir(module)] - pdict = dict(_items) - # If no __file__ or __package__ attributes are available, try to obtain them - # from the __module__ instead - if '__file__' not in pdict: - pdict['__file__'] = sys.modules[pdict['__module__']].__file__ - if '__package__' not in pdict and '__module__' in pdict: - if hasattr(sys.modules[pdict['__module__']], '__package__'): - pdict['__package__'] = sys.modules[pdict['__module__']].__package__ - else: - pdict = get_caller_module_dict(2) + pdict = get_module_dict(module) - # Set start symbol if it's specified directly using an argument + # Set start symbol if specified if start is not None: pdict['start'] = start - # Collect parser information from the dictionary + # Collect parser information pinfo = ParserReflect(pdict, log=errorlog) pinfo.get_all() - if pinfo.error: + # Handle errors + if pinfo.error or pinfo.validate_all(): raise YaccError('Unable to build parser') - if debuglog is None: - if debug: - try: - debuglog = Logger(open(debugfile, 'w')) - except IOError as e: - errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) - debuglog = NullLogger() - else: - debuglog = NullLogger() + # Log warnings for missing error function + if not pinfo.error_func: + errorlog.warning('no p_error() function is defined') - errors = False + # Create a grammar object and add productions + grammar = create_grammar(pinfo, errorlog) + + # Set start symbol for grammar + set_start_symbol(start, pinfo, grammar, errorlog) - # Validate the parser information - if pinfo.validate_all(): + # Verify the grammar structure + errors = verify_grammar(grammar, errorlog) + + if errors: raise YaccError('Unable to build parser') - if not pinfo.error_func: - errorlog.warning('no p_error() function is defined') + # Check for recursion and conflicts + check_recursion_and_conflicts(grammar, errorlog, check_recursion) + + # Run the LRTable on the grammar and return the parser + lr = LRTable(grammar, debuglog) + report_conflicts(lr, debuglog, errorlog, debug) + return build_parser(lr, pinfo) - # Create a grammar object + +def get_module_dict(module): + if module: + return get_module_dict_from_module(module) + return get_caller_module_dict(2) + + +def get_module_dict_from_module(module): + _items = [(k, getattr(module, k)) for k in dir(module)] + pdict = dict(_items) + + # Ensure that __file__ and __package__ are set if not present + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ + return pdict + + +def create_grammar(pinfo, errorlog): grammar = Grammar(pinfo.tokens) # Set precedencelevel for terminals @@ -2010,9 +2046,11 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, grammar.add_production(prodname, syms, funcname, file, line) except GrammarError as e: errorlog.error('%s', e) - errors = True - # Set the grammar start symbols + return grammar + + +def set_start_symbol(start, pinfo, grammar, errorlog): try: if start is None: grammar.set_start(pinfo.start) @@ -2020,67 +2058,47 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, grammar.set_start(start) except GrammarError as e: errorlog.error(str(e)) - errors = True - if errors: - raise YaccError('Unable to build parser') - # Verify the grammar structure +def verify_grammar(grammar, errorlog): + errors = False + + # Verify undefined symbols undefined_symbols = grammar.undefined_symbols() for sym, prod in undefined_symbols: errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) errors = True + # Check unused terminals unused_terminals = grammar.unused_terminals() if unused_terminals: - debuglog.info('') - debuglog.info('Unused terminals:') - debuglog.info('') - for term in unused_terminals: - errorlog.warning('Token %r defined, but not used', term) - debuglog.info(' %s', term) - - # Print out all productions to the debug log - if debug: - debuglog.info('') - debuglog.info('Grammar') - debuglog.info('') - for n, p in enumerate(grammar.productions): - debuglog.info('Rule %-5d %s', n, p) + report_unused_terminals(unused_terminals, errorlog) - # Find unused non-terminals + # Check unused non-terminals unused_rules = grammar.unused_rules() - for prod in unused_rules: - errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) + report_unused_rules(unused_rules, errorlog) - if len(unused_terminals) == 1: - errorlog.warning('There is 1 unused token') if len(unused_terminals) > 1: errorlog.warning('There are %d unused tokens', len(unused_terminals)) - - if len(unused_rules) == 1: - errorlog.warning('There is 1 unused rule') if len(unused_rules) > 1: errorlog.warning('There are %d unused rules', len(unused_rules)) - if debug: - debuglog.info('') - debuglog.info('terminals, with rules where they appear') - debuglog.info('') - terms = list(grammar.terminals) - terms.sort() - for term in terms: - debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.terminals[term]])) + # Log recursion or other errors + return errors + + +def report_unused_terminals(unused_terminals, errorlog): + errorlog.warning('Unused terminals:') + for term in unused_terminals: + errorlog.warning('Token %r defined, but not used', term) + + +def report_unused_rules(unused_rules, errorlog): + for prod in unused_rules: + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) - debuglog.info('') - debuglog.info('nonterminals, with rules where they appear') - debuglog.info('') - nonterms = list(grammar.nonterminals) - nonterms.sort() - for nonterm in nonterms: - debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.nonterminals[nonterm]])) - debuglog.info('') +def check_recursion_and_conflicts(grammar, errorlog, check_recursion): if check_recursion: unreachable = grammar.find_unreachable() for u in unreachable: @@ -2089,62 +2107,37 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, infinite = grammar.infinite_cycles() for inf in infinite: errorlog.error('Infinite recursion detected for symbol %r', inf) - errors = True unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: errorlog.error('precedencerule %r defined for unknown symbol %r', assoc, term) - errors = True - if errors: - raise YaccError('Unable to build parser') - - # Run the LRTable on the grammar - lr = LRTable(grammar, debuglog) +def report_conflicts(lr, debuglog, errorlog, debug): if debug: num_sr = len(lr.sr_conflicts) - - # Report shift/reduce and reduce/reduce conflicts - if num_sr == 1: - errorlog.warning('1 shift/reduce conflict') - elif num_sr > 1: + if num_sr > 0: errorlog.warning('%d shift/reduce conflicts', num_sr) num_rr = len(lr.rr_conflicts) - if num_rr == 1: - errorlog.warning('1 reduce/reduce conflict') - elif num_rr > 1: + if num_rr > 0: errorlog.warning('%d reduce/reduce conflicts', num_rr) - # Write out conflicts to the output file - if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning('') - debuglog.warning('Conflicts:') - debuglog.warning('') - - for state, tok, resolution in lr.sr_conflicts: - debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) - already_reported = set() - for state, rule, rejected in lr.rr_conflicts: - if (state, id(rule), id(rejected)) in already_reported: - continue - debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - debuglog.warning('rejected rule (%s) in state %d', rejected, state) - errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - errorlog.warning('rejected rule (%s) in state %d', rejected, state) - already_reported.add((state, id(rule), id(rejected))) - - warned_never = [] - for state, rule, rejected in lr.rr_conflicts: - if not rejected.reduced and (rejected not in warned_never): - debuglog.warning('Rule (%s) is never reduced', rejected) - errorlog.warning('Rule (%s) is never reduced', rejected) - warned_never.append(rejected) - - # build the parser + # Report conflicts to debug log + if lr.sr_conflicts or lr.rr_conflicts: + debuglog.warning('') + debuglog.warning('Conflicts:') + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + for state, rule, rejected in lr.rr_conflicts: + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + + +def build_parser(lr, pinfo): lr.bind_callables(pinfo.pdict) parser = LRParser(lr, pinfo.error_func) - + global parse parse = parser.parse return parser -- Gitee From dafe90e6f293f94c76095abba694271fa8684466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Wed, 11 Dec 2024 02:04:53 +0000 Subject: [PATCH 42/87] 1211 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/yacc.py | 222 +++++++++++++++++------------------- 1 file changed, 103 insertions(+), 119 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 7783eb16..74ff1b9f 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -215,10 +215,8 @@ class LRParser: def disable_defaulted_states(self): self.defaulted_states = {} - def parse(self, put=None, lexer=None, debug=False, tracking=False): - if isinstance(debug, int) and debug: - debug = Logger(sys.stderr) + debug, lexer = self._initialize_parser(debug, lexer) lookahead = None lookaheadstack = [] actions = self.action @@ -227,12 +225,8 @@ class LRParser: defaulted_states = self.defaulted_states pslice = YaccProduction(None) errorcount = 0 - if debug: debug.info('PARSE DEbUG start') - if not lexer: - from . import lex - lexer = lex.lexer pslice.lexer = lexer pslice.parser = self if put is not None: @@ -248,43 +242,14 @@ class LRParser: symstack.append(sym) state = 0 while True: - if debug: - debug.debug('State : %s', state) - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - if debug: - debug.debug('Defaulted state %s: Reduce using %d', state, -t) - - if debug: - debug.debug('Stack : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - + lookahead, lookaheadstack, state, t = self.parse_step(state, lookahead, lookaheadstack, statestack, + symstack, actions, defaulted_states, debug, get_token) if t is not None: if t > 0: - statestack.append(t) - state = t - - if debug: - debug.debug('Action : Shift and goto state %s', t) - - symstack.append(lookahead) - lookahead = None - if errorcount: - errorcount -= 1 + # Call the new shift_and_goto function + state, symstack, lookahead, errorcount = self.shift_and_goto(t, statestack, symstack, lookahead, + debug, errorcount) continue - if t < 0: p = prod[-t] pname = p.name @@ -292,7 +257,6 @@ class LRParser: sym = YaccSymbol() sym.type = pname # Production name sym.value = None - if debug: if plen: debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, @@ -301,20 +265,11 @@ class LRParser: else: debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], goto[statestack[-1]][pname]) - if plen: targ = symstack[-plen - 1:] targ[0] = sym - - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, 'endlineno', t1.lineno) - sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + self.update_tracking_info(tracking, targ, sym) pslice.slice = targ - try: # Call the grammar rule with our special slice object del symstack[-plen:] @@ -336,18 +291,13 @@ class LRParser: lookahead = sym errorcount = ERROR_COUNT self.errorok = False - continue - else: - if tracking: sym.lineno = lexer.lineno sym.lexpos = lexer.lexpos - targ = [sym] pslice.slice = targ - try: self.state = state p.callable(pslice) @@ -365,21 +315,15 @@ class LRParser: lookahead = sym errorcount = ERROR_COUNT self.errorok = False - continue - if t == 0: n = symstack[-1] result = getattr(n, 'value', None) - if debug: debug.info('Done : Returning %s', format_result(result)) debug.info('PARSE DEbUG END') - return result - if t is None: - if debug: debug.error('Error : %s', ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) @@ -399,18 +343,7 @@ class LRParser: errtoken = None continue else: - if errtoken: - if hasattr(errtoken, 'lineno'): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) - else: - sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) - else: - sys.stderr.write('yacc: Parse error in input. EOF\n') - return + self.handle_syntax_error(errtoken, lookahead) else: errorcount = ERROR_COUNT @@ -422,25 +355,8 @@ class LRParser: continue if lookahead.type == '$end': return - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - if tracking: - sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) - sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - - if hasattr(lookahead, 'lineno'): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, 'lexpos'): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t + lookahead = self.handle_error(lookahead, symstack, lookaheadstack, tracking) else: sym = symstack.pop() if tracking: @@ -451,6 +367,96 @@ class LRParser: continue raise RuntimeError('yacc: internal parser error!!!\n') + def _initialize_parser(self, debug, lexer): + if isinstance(debug, int) and debug: + debug = Logger(sys.stderr) + if not lexer: + from . import lex + lexer = lex.lexer + return debug, lexer + + def parse_step(self, state, lookahead, lookaheadstack, statestack, symstack, actions, defaulted_states, debug, + get_token): + if debug: + debug.debug('State : %s', state) + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if debug: + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + + if debug: + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + + return lookahead, lookaheadstack, state, t + + def shift_and_goto(self, t, statestack, symstack, lookahead, debug, errorcount): + """Handle the shift and goto action during parsing.""" + statestack.append(t) # Shift the state + state = t + if debug: + debug.debug('Action : Shift and goto state %s', t) + symstack.append(lookahead) # Push the symbol onto the symbol stack + lookahead = None # Reset the lookahead token + if errorcount: + errorcount -= 1 # Decrement error count if there was a previous error + return state, symstack, lookahead, errorcount + + def update_tracking_info(self, tracking, targ, sym): + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + + def handle_error(self, lookahead, symstack, lookaheadstack, tracking): + sym = symstack[-1] + if sym.type == 'error': + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + lookahead = None + return lookahead + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + return lookahead + + def handle_syntax_error(self, errtoken, lookahead): + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') @@ -524,30 +530,6 @@ class Production(object): self.callable = pdict[self.func] -# ----------------------------------------------------------------------------- -# class LRItem -# -# This class represents a specific stage of parsing a production rule. For -# example: -# -# expr : expr . PLUS term -# -# In the above, the "." represents the current location of the parse. Here -# basic attributes: -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] -# number - Production number. -# -# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' -# then lr_next refers to 'expr -> expr PLUS . term' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# lr_after - List of all productions that immediately follow -# lr_before - Grammar symbol immediately before -# ----------------------------------------------------------------------------- - class LRItem(object): def __init__(self, p, n): self.name = p.name @@ -1112,7 +1094,6 @@ def traverse(x, n, stack, f, x_values, r, fp): element = stack.pop() - class LALRError(YaccError): pass @@ -1505,7 +1486,8 @@ class LRTable: actlist.append((a, p, f'shift and go to state {j}')) r = st_action.get(a) if r is not None: - self.handle_shift_shift_conflict(st, a, r, j, precedence, productions, st_action, st_actionp, log, p) + self.handle_shift_shift_conflict(st, a, r, j, precedence, productions, st_action, st_actionp, log, + p) else: st_action[a] = j st_actionp[a] = p @@ -1666,6 +1648,7 @@ def parse_rule(p, lastp, dline, file, ps): return prodname, syms, lastp + class ParserReflect(object): def __init__(self, pdict, log=None): self.pdict = pdict @@ -1961,6 +1944,7 @@ class ParserReflect(object): except IndexError: pass + def yacc(*, debug=YACC_DEBUG, module=None, start=None, check_recursion=True, optimize=False, debugfile=DEBUG_FILE, debuglog=None, errorlog=None): -- Gitee From 10359ff2fe882d5c7c9ab575a02d26df0e8ade2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=86=9B=E9=B9=8F?= <10406393+jun-peng-liu@user.noreply.gitee.com> Date: Thu, 12 Dec 2024 08:15:10 +0000 Subject: [PATCH 43/87] ASGASGAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘军鹏 <10406393+jun-peng-liu@user.noreply.gitee.com> --- script/local/parser/my_yacc.py | 2303 +++++++++++++++++++++++++++++--- 1 file changed, 2141 insertions(+), 162 deletions(-) diff --git a/script/local/parser/my_yacc.py b/script/local/parser/my_yacc.py index 126888ce..11390a1a 100644 --- a/script/local/parser/my_yacc.py +++ b/script/local/parser/my_yacc.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- ############################################################################# -# Copyright (c) 2024 Huawei Technologies Co.,Ltd. +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. # # openGauss is licensed under Mulan PSL v2. # You can use this software according to the terms @@ -10,170 +10,2149 @@ # # http://license.coscl.org.cn/MulanPSL2 # -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" bASIS, # WITHOUT WARRANTIES OF ANY KIND, -# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# EITHER EXPRESS OR IMPLIED, INCLUDING bUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTAbILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : MyYacc.py is a utility to check security configurations info on local node. +# Description : LocalCheckOS.py is a utility to check OS info on local node. ############################################################################# -import os +import re +import types import sys +import inspect -localDirPath = os.path.dirname(os.path.realpath(__file__)) - -sys.path.append(sys.path[0] + "/../") -from local.parser.my_lexer import tokens -from local.parser.my_lexer import token_dict -from local.parser.functions import get_function -from local.parser.variables import get_variable -from local.parser.yacc import yacc - -def exec_fn(fn): - fn[0](*fn[1]) - -class MyYacc(): - - tokens = tokens - - def p_conditions_relation_function(p): - '''sentence : conditions THEN function - ''' - if p[1]: - exec_fn(p[3]) - - def p_conditions_or(p): - 'conditions : conditions OR and_conditions' - p[0] = p[1] or p[3] - - def p_conditions_and_conditions(p): - 'conditions : and_conditions' - p[0] = p[1] - - def p_and_conditions_and(p): - ''' - and_conditions : and_conditions AND not_conditions - ''' - p[0] = p[1] and p[3] - - def p_and_conditions_cdt(p): - 'and_conditions : not_conditions' - p[0] = p[1] - - def p_not_cdt(p): - 'not_conditions : NOT cdt' - p[0] = not p[2] - - def p_not_conditions_cdt(p): - 'not_conditions : cdt' - p[0] = p[1] - - def p_cdt_ops(p): - ''' - cdt : expr EQUAL expr - | expr NEQUAL expr - | expr GE expr - | expr GT expr - | expr LE expr - | expr LT expr - ''' - if p[2] == token_dict['EQUAL']: - p[0] = (p[1] == p[3]) - if p[2] == token_dict['NEQUAL']: - p[0] = (p[1] != p[3]) - if p[2] == token_dict['GE']: - p[0] = (p[1] >= p[3]) - if p[2] == token_dict['GT']: - p[0] = (p[1] > p[3]) - if p[2] == token_dict['LE']: - p[0] = (p[1] <= p[3]) - if p[2] == token_dict['LT']: - p[0] = (p[1] < p[3]) - - def p_cdt_parens(p): - 'cdt : LPAREN conditions RPAREN' - p[0] = p[2] - - def p_expr_plus_minus(p): - ''' - expr : expr PLUS term - | expr MINUS term - ''' - if p[2] == token_dict['PLUS']: - p[0] = p[1] + p[3] - if p[2] == token_dict['MINUS']: - p[0] = p[1] - p[3] - - def p_expr_term(p): - 'expr : term' - p[0] = p[1] - - def p_term_times_divide_mod(p): - ''' - term : term TIMES factor - | term DIVIDE factor - | term MOD factor - ''' - if p[2] == token_dict['TIMES']: - p[0] = p[1] * p[3] - if p[2] == token_dict['DIVIDE']: - p[0] = p[1] / p[3] - if p[2] == token_dict['MOD']: - p[0] = p[1] % p[3] - - def p_term_factor(p): - 'term : factor' - p[0] = p[1] - - def p_factor_assign_simple(p): - ''' - factor : number - | string - ''' - p[0] = p[1] - - def p_factor_id(p): - 'factor : id' - p[0] = get_variable(p[1]) - - def p_factor_null(p): - 'factor : NULL' - p[0] = None - - def p_factor_bool(p): - ''' - factor : TRUE - | FALSE - ''' - if p[1] == token_dict['TRUE']: - p[0] = True - elif p[1] == token_dict['FALSE']: - p[0] = False - - def p_factor_paren(p): - 'factor : LPAREN expr RPAREN' - p[0] = p[2] - - def p_function(p): - 'function : id LPAREN variables RPAREN' - p[0] = (get_function(p[1]), p[3]) - - def p_variables_comma(p): - ''' - variables : variables COMMA expr - ''' - p[1].append(p[3]) - p[0] = p[1] - - def p_variables_factor(p): - 'variables : expr' - p[0] = [p[1]] - - #Error rule for syntax errors - @staticmethod - def p_error(p): - raise Exception('Syntax error in input!') - - def build(self): - self.yacc = yacc(module=MyYacc) +# ----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +# ----------------------------------------------------------------------------- + +YACC_DEBUG = False # Debugging mode. If set, yacc generates a +# a 'parser.out' file in the current directory + +DEBUG_FILE = 'parser.out' # Default name of the debugging file +ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode +RESULT_LIMIT = 40 # Size limit of results when running in debug mode. + +MAXINT = sys.maxsize + + +class Logger(object): + def __init__(self, f): + self.f = f + + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + critical = debug + + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + + +# Exception raised for yacc-related errors +class YaccError(Exception): + pass + + +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) > RESULT_LIMIT: + repr_str = repr_str[:RESULT_LIMIT] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) + return result + + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) < 16: + return repr_str + else: + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + + +# ----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +# ----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: + +class YaccSymbol: + def __str__(self): + return self.type + + def __repr__(self): + return str(self) + + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self, s, stack=None): + self.slice = s + self.stack = stack + self.lexer = None + self.parser = None + + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): + self.slice[n].value = v + + def __getslice__(self, i, j): + return [s.value for s in self.slice[i:j]] + + def __len__(self): + return len(self.slice) + + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) + + def set_lineno(self, n, lineno): + self.slice[n].lineno = lineno + + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline + + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) + + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos + + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + + @staticmethod + def error(): + raise SyntaxError + + +# ----------------------------------------------------------------------------- +# == LRParser == +# +# The LR Parsing engine. +# ----------------------------------------------------------------------------- + +class LRParser: + def __init__(self, lrtab, errorf): + self.productions = lrtab.lr_productions + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True + + def errok(self): + self.errorok = True + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + def parse(self, put=None, lexer=None, debug=False, tracking=False): + debug, lexer = self._initialize_parser(debug, lexer) + lookahead = None + lookaheadstack = [] + actions = self.action + goto = self.goto + prod = self.productions + defaulted_states = self.defaulted_states + pslice = YaccProduction(None) + errorcount = 0 + pslice.lexer = lexer + pslice.parser = self + if put is not None: + lexer.input(put) + get_token = self.token = lexer.token + statestack = self.statestack = [] + symstack = self.symstack = [] + pslice.stack = symstack + errtoken = None + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + lookahead, lookaheadstack, state, t = self.parse_step(state, lookahead, lookaheadstack, statestack, + symstack, actions, defaulted_states, debug, get_token) + if t is not None: + if t > 0: + # Call the new shift_and_goto function + state, symstack, lookahead, errorcount = self.shift_and_goto(t, statestack, symstack, lookahead, + debug, errorcount) + continue + if t < 0: + p = prod[-t] + pname = p.name + plen = p.len + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + if debug: + self.log_reduce_action(debug, p, plen, symstack, statestack, goto) + if plen: + targ = symstack[-plen - 1:] + targ[0] = sym + self.update_tracking_info(tracking, targ, sym) + pslice.slice = targ + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + self.log_debug_info(debug, pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = ERROR_COUNT + self.errorok = False + continue + else: + self.update_tracking_info(tracking, sym, lexer, pslice) + try: + self.state = state + p.callable(pslice) + self.log_debug_info(debug, pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = ERROR_COUNT + self.errorok = False + continue + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + self.log_parse_debug_info() + return result + if t is None: + if debug: + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + if errorcount == 0 or self.errorok: + errorcount = ERROR_COUNT + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = self.errorfunc(errtoken) + if self.errorok: + lookahead = tok + errtoken = None + continue + else: + self.handle_syntax_error(errtoken, lookahead) + + else: + errorcount = ERROR_COUNT + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + del lookaheadstack[:] + continue + if lookahead.type == '$end': + return + if lookahead.type != 'error': + lookahead = self.handle_error(lookahead, symstack, lookaheadstack, tracking) + else: + state = self.pop_and_update_state(symstack, statestack, tracking, lookahead) + continue + raise RuntimeError('yacc: internal parser error!!!\n') + + @staticmethod + def _initialize_parser(debug, lexer): + if isinstance(debug, int) and debug: + debug = Logger(sys.stderr) + if not lexer: + from . import lex + lexer = lex.lexer + return debug, lexer + + @staticmethod + def parse_step(state, lookahead, lookaheadstack, statestack, symstack, actions, defaulted_states, debug, + get_token): + if debug: + debug.debug('State : %s', state) + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if debug: + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + + if debug: + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + + return lookahead, lookaheadstack, state, t + + @staticmethod + def shift_and_goto(t, statestack, symstack, lookahead, debug, errorcount): + """Handle the shift and goto action during parsing.""" + statestack.append(t) # Shift the state + state = t + if debug: + debug.debug('Action : Shift and goto state %s', t) + symstack.append(lookahead) # Push the symbol onto the symbol stack + lookahead = None # Reset the lookahead token + if errorcount: + errorcount -= 1 # Decrement error count if there was a previous error + return state, symstack, lookahead, errorcount + + @staticmethod + def update_tracking_info(tracking, targ, sym): + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + + @staticmethod + def handle_error(lookahead, symstack, lookaheadstack, tracking): + sym = symstack[-1] + if sym.type == 'error': + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + lookahead = None + return lookahead + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + return lookahead + + @staticmethod + def handle_syntax_error(errtoken, lookahead): + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + @staticmethod + def log_reduce_action(debug, p, plen, symstack, statestack, goto): + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '[' + ','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + ']', + goto[statestack[-1 - plen]][p.name]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][p.name]) + + @staticmethod + def log_debug_info(debug, pslice): + if debug: + debug.info('Result : %s', format_result(pslice[0])) + + @staticmethod + def update_tracking_sym(tracking, sym, lexer, pslice): + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + targ = [sym] + pslice.slice = targ + + @staticmethod + def log_parse_debug_info(debug, result): + if debug: + debug.info('Done : Returning %s', format_result(result)) + debug.info('PARSE DEbUG END') + + @staticmethod + def pop_and_update_state(symstack, statestack, tracking, lookahead): + # 弹出符号栈的顶部元素 + sym = symstack.pop() + + # 如果需要跟踪信息,更新lookahead的行号和位置 + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + + # 弹出状态栈的顶部元素,并更新当前状态 + statestack.pop() + state = statestack[-1] + + # 返回更新后的状态 + return state + + +_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + + +class Production(object): + reduced = 0 + + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func + self.callable = None + self.file = file + self.line = line + self.prec = precedence + + # Internal settings used during table construction + + self.len = len(self.prod) # Length of the production + + # Create a list of unique production symbols used in the production + self.usyms = [] + for s in self.prod: + if s not in self.usyms: + self.usyms.append(s) + + # List of all LR items for the production + self.lr_items = [] + self.lr_next = None + + # Create a string representation + if self.prod: + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + self.str = '%s -> ' % self.name + + def __str__(self): + return self.str + + def __repr__(self): + return 'Production(' + str(self) + ')' + + def __len__(self): + return len(self.prod) + + def __nonzero__(self): + return 1 + + def __getitem__(self, index): + return self.prod[index] + + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. + try: + p.lr_after = self.Prodnames[p.prod[n + 1]] + except (IndexError, KeyError): + p.lr_after = [] + try: + p.lr_before = p.prod[n - 1] + except IndexError: + p.lr_before = None + return p + + # bind the production function name to a callable + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] + + +class LRItem(object): + def __init__(self, p, n): + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n + self.lookaheads = {} + self.prod.insert(n, '.') + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms + + def __str__(self): + if self.prod: + s = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + s = '%s -> ' % self.name + return s + + def __repr__(self): + return 'LRItem(' + str(self) + ')' + + +# ----------------------------------------------------------------------------- +# +# Return the rightmost terminal from a list of symbols. Used in add_production() +# ----------------------------------------------------------------------------- +def rightmost_terminal(symbols, terminals): + i = len(symbols) - 1 + while i >= 0: + if symbols[i] in terminals: + return symbols[i] + i -= 1 + return None + + +# ----------------------------------------------------------------------------- +# === GRAMMAR CLASS === +# +# The following class represents the contents of the specified grammar along +# with various computed properties such as first sets, follow sets, LR items, etc. +# This data is used for critical parts of the table generation process later. +# ----------------------------------------------------------------------------- + +class GrammarError(YaccError): + pass + + +class Grammar(object): + def __init__(self, terminals): + self.productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + self.prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + self.prodmap = {} # A dictionary that is only used to detect duplicate + # productions. + + self.terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + for term in terminals: + self.terminals[term] = [] + + self.terminals['error'] = [] + + self.nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + self.first = {} # A dictionary of precomputed first(x) symbols + + self.follow = {} # A dictionary of precomputed follow(x) symbols + + self.precedence = {} # precedencerules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + self.usedprecedence = set() # precedencerules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedencerules. + + self.start = None # starting symbol for the grammar + + def __len__(self): + return len(self.productions) + + def __getitem__(self, index): + return self.productions[index] + + # ----------------------------------------------------------------------------- + # + # Sets the precedencefor a given terminal. assoc is the associativity such as + # 'left','right', or 'nonassoc'. level is a numeric level. + # + # ----------------------------------------------------------------------------- + + def set_precedence(self, term, assoc, level): + assert self.productions == [None], 'Must call set_precedence() before add_production()' + if term in self.precedence: + raise GrammarError('precedencealready specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: + raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") + self.precedence[term] = (assoc, level) + + # ----------------------------------------------------------------------------- + # + # Given an action function, this function assembles a production rule and + # computes its precedencelevel. + # + # The production rule is supplied as a list of symbols. For example, + # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and + # symbols ['expr','PLUS','term']. + # + # precedenceis determined by the precedenceof the right-most non-terminal + # or the precedenceof a terminal specified by %prec. + # + # A variety of error checks are performed to make sure production symbols + # are valid and that %prec is used correctly. + # ----------------------------------------------------------------------------- + + def validate_prodname(self, prodname, file, line): + """Validate the production name.""" + if prodname in self.terminals: + raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token') + if prodname == 'error': + raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word') + if not _is_identifier.match(prodname): + raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}') + + def handle_literal_tokens(self, syms, file, line, prodname): + """Handle literal tokens in the rule symbols.""" + for n, s in enumerate(syms): + if s[0] in "'\"": + c = self.proccess_literal_token(s, file, line, prodname) + if c is not None: + syms[n] = c + continue + if not _is_identifier.match(s) and s != '%prec': + raise GrammarError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') + + def proccess_literal_token(self, s, file, line, prodname): + """处理文字(literal)token.""" + try: + c = eval(s) + if len(c) > 1: + raise GrammarError( + f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') + if c not in self.terminals: + self.terminals[c] = [] + return c + except SyntaxError: + pass + return None + + def handle_precedence(self, syms, file, line): + """Handle precedencesettings in the rule.""" + if '%prec' in syms: + if syms[-1] == '%prec': + raise GrammarError(f'{file}:{line}: Syntax error. Nothing follows %%prec') + if syms[-2] != '%prec': + raise GrammarError(f'{file}:{line}: Syntax error. %%prec can only appear at the end of a grammar rule') + precname = syms[-1] + prodprec = self.precedence.get(precname) + if not prodprec: + raise GrammarError(f'{file}:{line}: Nothing known about the precedenceof {precname!r}') + self.usedprecedence.add(precname) + del syms[-2:] # Drop %prec from the rule + return prodprec + else: + # If no %prec, precedenceis determined by the rightmost terminal symbol + precname = rightmost_terminal(syms, self.terminals) + return self.precedence.get(precname, ('right', 0)) + + def check_duplicate_rule(self, prodname, syms, file, line): + """Check for duplicate rule definitions.""" + rule_map = f'{prodname} -> {syms}' + if rule_map in self.prodmap: + m = self.prodmap[rule_map] + raise GrammarError(f'{file}:{line}: Duplicate rule {rule_map}. Previous definition at {m.file}:{m.line}') + + def add_production(self, prodname, syms, func=None, file='', line=0): + """Main method to add a production.""" + # Validate the production name + self.validate_prodname(prodname, file, line) + + # Handle literal tokens in the symbols + self.handle_literal_tokens(syms, file, line, prodname) + + # Handle precedence + prodprec = self.handle_precedence(syms, file, line) + + # Check for duplicate rules + self.check_duplicate_rule(prodname, syms, file, line) + + # Create a new production instance + pnumber = len(self.productions) + if prodname not in self.nonterminals: + self.nonterminals[prodname] = [] + + # Add the production number to terminals and nonterminals + for t in syms: + if t in self.terminals: + self.terminals[t].append(pnumber) + else: + if t not in self.nonterminals: + self.nonterminals[t] = [] + self.nonterminals[t].append(pnumber) + + # Create and add the production + p = Production(pnumber, prodname, syms, prodprec, func, file, line) + self.productions.append(p) + self.prodmap[f'{prodname} -> {syms}'] = p + + # Add to the global productions list + try: + self.prodnames[prodname].append(p) + except KeyError: + self.prodnames[prodname] = [p] + + # ----------------------------------------------------------------------------- + # + # Sets the starting symbol and creates the augmented grammar. Production + # rule 0 is S' -> start where start is the start symbol. + # ----------------------------------------------------------------------------- + + def set_start(self, start=None): + if not start: + start = self.productions[1].name + if start not in self.nonterminals: + raise GrammarError('start symbol %s undefined' % start) + self.productions[0] = Production(0, "S'", [start]) + self.nonterminals[start].append(0) + self.start = start + + # ----------------------------------------------------------------------------- + # + # Find all of the nonterminal symbols that can't be reached from the starting + # symbol. Returns a list of nonterminals that can't be reached. + # ----------------------------------------------------------------------------- + + def find_unreachable(self): + + # Mark all symbols that are reachable from a symbol s + def mark_reachable_from(s): + if s in reachable: + return + reachable.add(s) + for p in self.prodnames.get(s, []): + for r in p.prod: + mark_reachable_from(r) + + reachable = set() + mark_reachable_from(self.productions[0].prod[0]) + return [s for s in self.nonterminals if s not in reachable] + + # ----------------------------------------------------------------------------- + # + # This function looks at the various parsing rules and tries to detect + # infinite recursion cycles (grammar rules where there is no possible way + # to derive a string of only terminals). + # ----------------------------------------------------------------------------- + + def infinite_cycles(self): + terminates = {} + + # terminals: + for t in self.terminals: + terminates[t] = True + + terminates['$end'] = True + + # nonterminals: + + # Initialize to false: + for n in self.nonterminals: + terminates[n] = False + + # Propagate termination until no change + self.propagate_termination(terminates) + + # Collect symbols that do not terminate + infinite = self.collect_infinite(terminates) + + return infinite + + def propagate_termination(self, terminates): + while True: + some_change = False + for (n, pl) in self.prodnames.items(): + some_change |= self.check_productions_for_termination(n, pl, terminates) + if not some_change: + break + + def check_productions_for_termination(self, n, productions, terminates): + some_change = False + for p in productions: + p_terminates = self.check_production_termination(p, terminates) + if p_terminates: + if not terminates[n]: + terminates[n] = True + some_change = True + # Don't need to consider any more productions for this nonterminal. + break + return some_change + + def check_production_termination(self, production, terminates): + for s in production.prod: + if not terminates.get(s, False): + # If any symbol does not terminate, the production does not terminate. + return False + # All symbols terminate, so production terminates. + return True + + def collect_infinite(self, terminates): + infinite = [] + for (s, term) in terminates.items(): + if not term: + if s not in self.prodnames and s not in self.terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + infinite.append(s) + return infinite + + def undefined_symbols(self): + result = [] + for p in self.productions: + if not p: + continue + + for s in p.prod: + if s not in self.prodnames and s not in self.terminals and s != 'error': + result.append((s, p)) + return result + + def unused_terminals(self): + unused_tok = [] + for s, v in self.terminals.items(): + if s != 'error' and not v: + unused_tok.append(s) + + return unused_tok + + def unused_rules(self): + unused_prod = [] + for s, v in self.nonterminals.items(): + if not v: + p = self.prodnames[s][0] + unused_prod.append(p) + return unused_prod + + # ----------------------------------------------------------------------------- + # + # Returns a list of tuples (term,precedence) corresponding to precedence + # rules that were never used by the grammar. term is the name of the terminal + # on which precedencewas applied and precedenceis a string such as 'left' or + # 'right' corresponding to the type of precedence. + # ----------------------------------------------------------------------------- + + def unused_precedence(self): + unused = [] + for termname in self.precedence: + if not (termname in self.terminals or termname in self.usedprecedence): + unused.append((termname, self.precedence[termname][0])) + + return unused + + def _first(self, beta): + # We are computing first(x1,x2,x3,...,xn) + result = [] + for x in beta: + x_produces_empty = self._process_first_set(x, result) + if not x_produces_empty: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('') + + return result + + def _process_first_set(self, x, result): + x_produces_empty = False + # Add all the non- symbols of first[x] to the result. + for f in self.first[x]: + if f == '': + x_produces_empty = True + else: + if f not in result: + result.append(f) + return x_produces_empty + + def compute_first(self): + if self.first: + return self.first + # terminals: + for t in self.terminals: + self.first[t] = [t] + self.first['$end'] = ['$end'] + # nonterminals: + # Initialize to the empty set: + for n in self.nonterminals: + self.first[n] = [] + # Then propagate symbols until no change: + while True: + some_change = False + some_change = self._propagate_first() + if not some_change: + break + return self.first + + def _propagate_first(self): + some_change = False + for n in self.nonterminals: + some_change |= self._update_first_set(n) + return some_change + + def _update_first_set(self, nonterminal): + some_change = False + for p in self.prodnames[nonterminal]: + for f in self._first(p.prod): + if f not in self.first[nonterminal]: + self.first[nonterminal].append(f) + some_change = True + return some_change + + def compute_follow(self, start=None): + # If already computed, return the result + if self.follow: + return self.follow + + # If first sets not computed yet, do that first. + if not self.first: + self.compute_first() + + # Add '$end' to the follow list of the start symbol + for k in self.nonterminals: + self.follow[k] = [] + + if not start: + start = self.productions[1].name + + self.follow[start] = ['$end'] + + while True: + didadd = self.process_productions() + if not didadd: + break + + return self.follow + + def process_productions(self): + didadd = False + for p in self.productions[1:]: + didadd = self.process_production(p, didadd) + return didadd + + def process_production(self, p, didadd): + for i, b in enumerate(p.prod): + if b in self.nonterminals: + fst = self._first(p.prod[i + 1:]) + didadd = self.process_first_set(fst, b, p, i, didadd) + return didadd + + def process_first_set(self, fst, b, p, i, didadd): + hasempty = False + for f in fst: + if f != '' and f not in self.follow[b]: + self.follow[b].append(f) + didadd = True + if f == '': + hasempty = True + if hasempty or i == (len(p.prod) - 1): + didadd = self.add_follow_to_nonterminal(p, b, didadd) + return didadd + + def add_follow_to_nonterminal(self, p, b, didadd): + for f in self.follow[p.name]: + if f not in self.follow[b]: + self.follow[b].append(f) + didadd = True + return didadd + + def build_lritems(self): + for p in self.productions: + lastlri = p + i = 0 + lr_items = [] + while True: + lri = self._process_lr_item(p, i, lastlri) + if not lri: + break + lr_items.append(lri) + lastlri = lri + i += 1 + p.lr_items = lr_items + + def _process_lr_item(self, p, i, lastlri): + """ + Process a single LR item step and return the next lri object. + """ + if i > len(p): + lri = None + else: + lri = LRItem(p, i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.prodnames[lri.prod[i + 1]] + except (IndexError, KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i - 1] + except IndexError: + lri.lr_before = None + + lastlri.lr_next = lri + return lri + + +def digraph(nodes, edges, fp): + # 初始化每个节点的状态为0 + n = {} + for node in nodes: + n[node] = 0 + + stack = [] + f = {} + + # 遍历图中的每个节点 + for node in nodes: + if n[node] == 0: + traverse(node, n, stack, f, nodes, edges, fp) + + return f + + +def traverse(x, n, stack, f, x_values, r, fp): + stack.append(x) + d = len(stack) + n[x] = d + f[x] = fp(x) # f(x) <- f'(x) + + related = r(x) # Get y's related to x + for y in related: + if n[y] == 0: + traverse(y, n, stack, f, x_values, r, fp) + n[x] = min(n[x], n[y]) + for a in f.get(y, []): + if a not in f[x]: + f[x].append(a) + if n[x] == d: + n[stack[-1]] = MAXINT + f[stack[-1]] = f[x] + element = stack.pop() + while element != x: + n[stack[-1]] = MAXINT + f[stack[-1]] = f[x] + element = stack.pop() + + +class LALRError(YaccError): + pass + + +# ----------------------------------------------------------------------------- +# == LRTable == +# +# This class implements the LR table generation algorithm. There are no +# public methods. +# ----------------------------------------------------------------------------- + +class LRTable: + def __init__(self, grammar, log=None): + self.grammar = grammar + + # Set up the logger + if not log: + log = NullLogger() + self.log = log + + # Internal attributes + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures + + self._add_count = 0 # Internal counter used to detect cycles + + # Diagnostic information filled in by the table generator + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts + + self.sr_conflicts = [] + self.rr_conflicts = [] + + # build the tables + self.grammar.build_lritems() + self.grammar.compute_first() + self.grammar.compute_follow() + self.lr_parse_table() + + # bind all production function names to callable objects in pdict + def bind_callables(self, pdict): + for p in self.lr_productions: + p.bind(pdict) + + def lr0_closure(self, input_items): + self._add_count += 1 + closure_items = input_items[:] + did_add = True + while did_add: + did_add = self._process_lr0_closure(closure_items) + return closure_items + + def _process_lr0_closure(self, closure_items): + """ + Process a single step of the lr0 closure algorithm. + It tries to add new LR items to the closure. + """ + did_add = False + for item in closure_items: + for x in item.lr_after: + if getattr(x, 'lr0_added', 0) == self._add_count: + continue + # Add b --> .G to closure_items + closure_items.append(x.lr_next) + x.lr0_added = self._add_count + did_add = True + + return did_add + + def lr0_goto(self, input_items, x): + # first we look for a previously cached entry + g = self.lr_goto_cache.get((id(input_items), x)) + if g: + return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = self.lr_goto_cache.get(x) + if not s: + s = {} + self.lr_goto_cache[x] = s + + gs = [] + for p in input_items: + n = p.lr_next + if n and n.lr_before == x: + s1 = s.get(id(n)) + if not s1: + s1 = {} + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end') + if not g: + if gs: + g = self.lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + self.lr_goto_cache[(id(input_items), x)] = g + return g + + def lr0_items(self): + closure_set = [self.lr0_closure([self.grammar.productions[0].lr_next])] + i = 0 + for item_set in closure_set: + self.lr0_cidhash[id(item_set)] = i + i += 1 + i = 0 + while i < len(closure_set): + item_set = closure_set[i] + i += 1 + symbols = {} + for item in item_set: + for symbol in item.usyms: + symbols[symbol] = None + for symbol in symbols: + g = self.lr0_goto(item_set, symbol) + if not g or id(g) in self.lr0_cidhash: + continue + self.lr0_cidhash[id(g)] = len(closure_set) + closure_set.append(g) + return closure_set + + def compute_nullable_nonterminals(self): + nullable = set() + num_nullable = 0 + while True: + num_nullable = self._process_nullable_step(nullable, num_nullable) + if len(nullable) == num_nullable: + break + return nullable + + def _process_nullable_step(self, nullable, num_nullable): + for p in self.grammar.productions[1:]: + if p.len == 0: + nullable.add(p.name) + continue + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + return len(nullable) + + def find_nonterminal_transitions(self, input_item): + trans = [] + for stateno, state in enumerate(input_item): + for p in state: + self._process_transition(p, stateno, trans) + return trans + + def _process_transition(self, p, stateno, trans): + """ + Process a single transition and update the trans list. + This method checks if the transition should be added. + """ + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index + 1]) + if t[1] in self.grammar.nonterminals: + if t not in trans: + trans.append(t) + + def dr_relation(self, input_item, trans, nullable): + state, n = trans + terms = [] + g = self.lr0_goto(input_item[state], n) + for p in g: + self._process_relation(p, terms) + if state == 0 and n == self.grammar.productions[0].prod[0]: + terms.append('$end') + return terms + + def _process_relation(self, p, terms): + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in self.grammar.terminals: + if a not in terms: + terms.append(a) + + def reads_relation(self, item, trans, empty): + # Look for empty transitions + rel = [] + state, n = trans + + g = self.lr0_goto(item[state], n) + j = self.lr0_cidhash.get(id(g), -1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) + + return rel + + def compute_lookback_includes(self, item, trans, nullable): + lookdict = {} + includedict = {} + dtrans = {t: 1 for t in trans} + for state, n in trans: + lookb = [] + includes = [] + for p in item[state]: + if p.name != n: + continue + self._process_lookback_and_include(item, state, p, dtrans, includes, lookb, nullable) + for i in includes: + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, n)) + lookdict[(state, n)] = lookb + return lookdict, includedict + + def _process_lookback_and_include(self, item, state, p, dtrans, includes, lookb, nullable): + """ + Process lookback and include relations for a single production. + This handles the inner `while` loop logic and `lookb` and `includes` updates. + """ + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index += 1 + t = p.prod[lr_index] + if (j, t) in dtrans: + self._process_include_relation(p, lr_index, j, t, includes, nullable) + g = self.lr0_goto(item[j], t) + j = self.lr0_cidhash.get(id(g), -1) + self._process_lookback_relation(item, j, p, lookb) + + def _process_include_relation(self, p, lr_index, j, t, includes, nullable): + """ + Process the includes relation based on the production and nullable symbols. + """ + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.terminals: + break + if p.prod[li] not in nullable: + break + li += 1 + else: + includes.append((j, t)) + + @staticmethod + def _process_lookback_relation(item, j, p, lookb): + """ + Process the lookback relation by comparing the current and previous productions. + """ + for r in item[j]: + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + while i < r.lr_index: + if r.prod[i] != p.prod[i + 1]: + break + i += 1 + else: + lookb.append((j, r)) + + def compute_read_sets(self, c, ntrans, nullable): + fp = lambda x: self.dr_relation(c, x, nullable) + r = lambda x: self.reads_relation(c, x, nullable) + f = digraph(ntrans, r, fp) + return f + + @staticmethod + def compute_follow_sets(ntrans, readsets, inclsets): + fp = lambda x: readsets[x] + r = lambda x: inclsets.get(x, []) + f = digraph(ntrans, r, fp) + return f + + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): + # Loop over productions in lookback + for state, p in lb: + self._ensure_lookaheads(p, state) # Ensure lookaheads for the production + + f = followset.get(trans, []) + self._add_lookaheads_to_production(p, state, f) # Add lookaheads from followset + + @staticmethod + def _ensure_lookaheads(p, state): + if state not in p.lookaheads: + p.lookaheads[state] = [] + + @staticmethod + def _add_lookaheads_to_production(p, state, followset_elements): + for a in followset_elements: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) + + # ----------------------------------------------------------------------------- + # + # This function does all of the work of adding lookahead information for use + # with LALR parsing + # ----------------------------------------------------------------------------- + + def add_lalr_lookaheads(self, c): + # Determine all of the nullable nonterminals + nullable = self.compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = self.find_nonterminal_transitions(c) + + # Compute read sets + readsets = self.compute_read_sets(c, trans, nullable) + + # Compute lookback/includes relations + lookd, included = self.compute_lookback_includes(c, trans, nullable) + + # Compute LALR follow sets + followsets = self.compute_follow_sets(trans, readsets, included) + + # Add all of the lookaheads + self.add_lookaheads(lookd, followsets) + + @staticmethod + def handle_shift_reduce_conflict(st, a, p, r, precedence, productions, log, j=None): + """Handle shift/reduce conflict.""" + if r > 0: + sprec, slevel = precedence.get(a, ('right', 0)) + rprec, rlevel = productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + return -p.number, p, 'reduce', None + elif (slevel == rlevel) and (rprec == 'nonassoc'): + return None, None, None, None + else: + return j, p, 'shift', None + elif r < 0: + oldp = productions[-r] + pp = productions[p.number] + if oldp.line > pp.line: + return -p.number, p, 'reduce', oldp + else: + return -oldp.number, oldp, 'reduce', pp + return None, None, None, None + + @staticmethod + def log_shift_reduce_action(log, a, m): + """Log shift/reduce or reduce/reduce actions.""" + log.info(' %-15s %s', a, m) + + def process_state_transitions(self, st, item, st_action, precedence, productions, action, goto, log): + """Process state transitions and handle conflicts.""" + st_goto = {} + actlist = [] + st_actionp = {} + + for p in item: + if p.len == p.lr_index + 1: + self.handle_reduce_actions(st, p, st_action, st_actionp, precedence, productions, actlist, log) + else: + self.handle_shift_actions(st, p, st_action, st_actionp, precedence, productions, actlist, log, item) + + return st_action, st_actionp, st_goto, actlist + + def handle_reduce_actions(self, st, p, st_action, st_actionp, precedence, productions, actlist, log): + """Handle reduce actions.""" + if p.name == "S'": + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, f'reduce using rule {p.number} ({p})')) + r = st_action.get(a) + if r is not None: + self.handle_shift_reduce_conflict(st, a, p, r, precedence, productions, log) + else: + st_action[a] = -p.number + st_actionp[a] = p + productions[p.number].reduced += 1 + + def handle_shift_actions(self, st, p, st_action, st_actionp, precedence, productions, actlist, log, item): + """Handle shift actions.""" + i = p.lr_index + a = p.prod[i + 1] + if a in self.grammar.terminals: + g = self.lr0_goto(item, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + actlist.append((a, p, f'shift and go to state {j}')) + r = st_action.get(a) + if r is not None: + self.handle_shift_shift_conflict(st, a, r, j, precedence, productions, st_action, st_actionp, log, + p) + else: + st_action[a] = j + st_actionp[a] = p + + def handle_shift_shift_conflict(self, st, a, r, j, precedence, productions, st_action, st_actionp, log, p): + """Handle shift/shift conflicts.""" + if r > 0 and r != j: + raise LALRError(f'Shift/shift conflict in state {st}') + elif r < 0: + sprec, slevel = precedence.get(a, ('right', 0)) + rprec, rlevel = productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + elif slevel == rlevel and rprec == 'nonassoc': + st_action[a] = None + else: + self.log_shift_reduce_action(self, log, a, "shift") + + def lr_parse_table(self): + productions = self.grammar.productions + precedence = self.grammar.precedence + goto = self.lr_goto + action = self.lr_action + log = self.log + actionp = {} + item = self.lr0_items() + self.add_lalr_lookaheads(item) + st = 0 + for i in item: + log.info('') + log.info(f'state {st}') + log.info('') + self._log_productions(i, log) # Log productions for the current state + log.info('') + + # Process the state transitions and conflicts + st_action = {} + st_actionp = {} + st_goto = {} + st_action, st_actionp, st_goto, actlist = self.process_state_transitions(st, i, st_action, precedence, + productions, action, goto, log) + self._log_actions(st_action, st_actionp, actlist, log) + self._handle_not_used_actions(st_action, st_actionp, actlist, log) + self._handle_state_transitions_for_nonterminals(i, st_goto, log) + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 + + @staticmethod + def _log_productions(item, log): + """ + Log the productions in a given state I. + """ + for p in item: + log.info(f' ({p.number}) {p}') + + @staticmethod + def _log_actions(st_action, st_actionp, actlist, log): + """ + Log actions for a given state transition. + """ + for a, p, m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(' %-15s %s', a, m) + + def _handle_not_used_actions(self, st_action, st_actionp, actlist, log): + """ + Handle actions that are not used and log them. + """ + _actprint = {} + not_used = False + for a, p, m in actlist: + if a in st_action: + not_used = self._check_not_used_action(a, p, st_actionp, m, _actprint, log) or not_used + if not_used: + log.debug('') + + @staticmethod + def _check_not_used_action(a, p, st_actionp, m, _actprint, log): + """ + Check if the action is not used and log it. + """ + if p is not st_actionp[a]: + if (a, m) not in _actprint: + log.debug(f' ! %-15s [ {m} ]') + _actprint[(a, m)] = 1 + return True + return False + + def _handle_state_transitions_for_nonterminals(self, item, st_goto, log): + """ + Handle state transitions for nonterminals and log the corresponding transitions. + """ + nkeys = {} + for ii in item: + for s in ii.usyms: + if s in self.grammar.nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(item, n) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + st_goto[n] = j + log.info(f' %-30s shift and go to state {j}') + + +def get_caller_module_dict(levels): + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict + + +# ----------------------------------------------------------------------------- +# +# This takes a raw grammar rule string and parses it into production data +# ----------------------------------------------------------------------------- +def parse_grammar(doc, file, line): + grammar = [] + pstrings = doc.splitlines() + dline = line + lastp = None + + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: + continue + try: + prodname, syms, lastp = parse_rule(p, lastp, dline, file, ps) + grammar.append((file, dline, prodname, syms)) + except SyntaxError: + raise + except Exception: + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) + + return grammar + + +def parse_rule(p, lastp, dline, file, ps): + if p[0] == '|': + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) + + return prodname, syms, lastp + + +class ParserReflect(object): + def __init__(self, pdict, log=None): + self.pdict = pdict + self.start = None + self.error_func = None + self.tokens = None + self.modules = set() + self.grammar = [] + self.error = False + + if log is None: + self.log = Logger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_start() + self.get_error_func() + self.get_tokens() + self.get_precedence() + self.get_pfunctions() + + # Validate all of the information + def validate_all(self): + self.validate_start() + self.validate_error_func() + self.validate_tokens() + self.validate_precedence() + self.validate_pfunctions() + self.validate_modules() + return self.error + + # Compute a signature over the grammar + def signature(self): + parts = [] + try: + if self.start: + parts.append(self.start) + if self.prec: + parts.append(''.join([''.join(p) for p in self.prec])) + if self.tokens: + parts.append(' '.join(self.tokens)) + for f in self.pfuncs: + if f[3]: + parts.append(f[3]) + except (TypeError, ValueError): + pass + return ''.join(parts) + + def validate_modules(self): + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + + for module in self.modules: + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + continue + self.check_function_redefinitions(lines, fre, module) + + def check_function_redefinitions(self, lines, fre, module): + counthash = {} + for linen, line in enumerate(lines, 1): + m = fre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if prev: + self.report_redefinition(module, linen, name, prev) + else: + counthash[name] = linen + + def report_redefinition(self, module, linen, name, prev): + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) + + # Get the start symbol + def get_start(self): + self.start = self.pdict.get('start') + + # Validate the start symbol + def validate_start(self): + if self.start is not None: + if not isinstance(self.start, str): + self.log.error("'start' must be a string") + + # Look for error handler + def get_error_func(self): + self.error_func = self.pdict.get('p_error') + + # Validate the error function + def validate_error_func(self): + if self.error_func: + if isinstance(self.error_func, types.FunctionType): + ismethod = 0 + elif isinstance(self.error_func, types.MethodType): + ismethod = 1 + else: + self.log.error("'p_error' defined, but is not a function or method") + self.error = True + return + + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) + + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True + + # Get the tokens map + def get_tokens(self): + tokens = self.pdict.get('tokens') + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = sorted(tokens) + + # Validate the tokens + def validate_tokens(self): + # Validate the tokens. + if 'error' in self.tokens: + self.log.error("Illegal token name 'error'. Is a reserved word") + self.error = True + return + + terminals = set() + for n in self.tokens: + if n in terminals: + self.log.warning('Token %r multiply defined', n) + terminals.add(n) + + # Get the precedencemap (if any) + def get_precedence(self): + self.prec = self.pdict.get('precedence') + + # Validate and parse the precedencemap + def validate_precedence(self): + preclist = [] + if self.prec: + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedencemust be a list or tuple') + self.error = True + return + + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('bad precedencetable') + self.error = True + return + + if len(p) < 2: + self.log.error('Malformed precedenceentry %s. Must be (assoc, term, ..., term)', p) + self.error = True + return + + assoc = p[0] + if not isinstance(assoc, str): + self.log.error('precedenceassociativity must be a string') + self.error = True + return + + # 提取内部逻辑到一个子函数 + self._validate_terms_and_append(p[1:], assoc, level + 1, preclist) + + self.preclist = preclist + + def _validate_terms_and_append(self, terms, assoc, level, preclist): + for term in terms: + if not isinstance(term, str): + self.log.error('precedenceitems must be strings') + self.error = True + return + preclist.append((term, assoc, level + 1)) + + # Get all p_functions from the grammar + def get_pfunctions(self): + p_functions = [] + for name, item in self.pdict.items(): + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) + self.pfuncs = p_functions + + def validate_pfunctions(self): + grammar = [] + # Check for non-empty symbols + if len(self.pfuncs) == 0: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) + func = self.pdict[name] + if isinstance(func, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True + elif not func.__doc__: + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) + else: + self.process_grammar_rule(doc, file, line, name, grammar) + self.modules.add(module) + + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + + self._check_possible_grammar_rule(v, n) + + self.grammar = grammar + + # Validate all of the p_functions + def process_grammar_rule(self, doc, file, line, name, grammar): + # 处理文档字符串并解析语法 + parsed_g = self.parse_grammar_with_error_handling(doc, file, line) + if parsed_g is not None: + for g in parsed_g: + grammar.append((name, g)) + + def parse_grammar_with_error_handling(self, doc, file, line): + try: + return parse_grammar(doc, file, line) + except SyntaxError as e: + self.log.error(str(e)) + self.error = True + return None + + def _check_possible_grammar_rule(self, v, n): + """ + Helper function to check if a function might be a possible grammar rule. + This is extracted from the loop to reduce complexity. + """ + if not self._is_possible_grammar_function(v): + return + + if self._has_doc(v): + self._check_doc_for_grammar_rule(v, n) + + def _is_possible_grammar_function(self, v): + """Check if v is a possible grammar function based on argument count.""" + return ( + (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2) + ) + + def _has_doc(self, v): + """Check if v has a docstring.""" + return v.__doc__ is not None + + def _check_doc_for_grammar_rule(self, v, n): + """Check if the docstring of v follows the expected grammar rule format.""" + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass + + +def yacc(*, debug=YACC_DEBUG, module=None, start=None, + check_recursion=True, optimize=False, debugfile=DEBUG_FILE, + debuglog=None, errorlog=None): + global parse + + # Initialize errorlog if None + if errorlog is None: + errorlog = Logger(sys.stderr) + + # Get the module dictionary used for the parser + pdict = get_module_dict(module) + + # Set start symbol if specified + if start is not None: + pdict['start'] = start + + # Collect parser information + pinfo = ParserReflect(pdict, log=errorlog) + pinfo.get_all() + + # Handle errors + if pinfo.error or pinfo.validate_all(): + raise YaccError('Unable to build parser') + + # Log warnings for missing error function + if not pinfo.error_func: + errorlog.warning('no p_error() function is defined') + + # Create a grammar object and add productions + grammar = create_grammar(pinfo, errorlog) + + # Set start symbol for grammar + set_start_symbol(start, pinfo, grammar, errorlog) + + # Verify the grammar structure + errors = verify_grammar(grammar, errorlog) + + if errors: + raise YaccError('Unable to build parser') + + # Check for recursion and conflicts + check_recursion_and_conflicts(grammar, errorlog, check_recursion) + + # Run the LRTable on the grammar and return the parser + lr = LRTable(grammar, debuglog) + report_conflicts(lr, debuglog, errorlog, debug) + return build_parser(lr, pinfo) + + +def get_module_dict(module): + if module: + return get_module_dict_from_module(module) + return get_caller_module_dict(2) + + +def get_module_dict_from_module(module): + _items = [(k, getattr(module, k)) for k in dir(module)] + pdict = dict(_items) + + # Ensure that __file__ and __package__ are set if not present + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ + return pdict + + +def create_grammar(pinfo, errorlog): + grammar = Grammar(pinfo.tokens) + + # Set precedencelevel for terminals + for term, assoc, level in pinfo.preclist: + try: + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) + + # Add productions to the grammar + for funcname, gram in pinfo.grammar: + file, line, prodname, syms = gram + try: + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + + return grammar + + +def set_start_symbol(start, pinfo, grammar, errorlog): + try: + if start is None: + grammar.set_start(pinfo.start) + else: + grammar.set_start(start) + except GrammarError as e: + errorlog.error(str(e)) + + +def verify_grammar(grammar, errorlog): + errors = False + + # Verify undefined symbols + undefined_symbols = grammar.undefined_symbols() + for sym, prod in undefined_symbols: + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True + + # Check unused terminals + unused_terminals = grammar.unused_terminals() + if unused_terminals: + report_unused_terminals(unused_terminals, errorlog) + + # Check unused non-terminals + unused_rules = grammar.unused_rules() + report_unused_rules(unused_rules, errorlog) + + if len(unused_terminals) > 1: + errorlog.warning('There are %d unused tokens', len(unused_terminals)) + if len(unused_rules) > 1: + errorlog.warning('There are %d unused rules', len(unused_rules)) + + # Log recursion or other errors + return errors + + +def report_unused_terminals(unused_terminals, errorlog): + errorlog.warning('Unused terminals:') + for term in unused_terminals: + errorlog.warning('Token %r defined, but not used', term) + + +def report_unused_rules(unused_rules, errorlog): + for prod in unused_rules: + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) + + +def check_recursion_and_conflicts(grammar, errorlog, check_recursion): + if check_recursion: + unreachable = grammar.find_unreachable() + for u in unreachable: + errorlog.warning('Symbol %r is unreachable', u) + + infinite = grammar.infinite_cycles() + for inf in infinite: + errorlog.error('Infinite recursion detected for symbol %r', inf) + + unused_prec = grammar.unused_precedence() + for term, assoc in unused_prec: + errorlog.error('precedencerule %r defined for unknown symbol %r', assoc, term) + + +def report_conflicts(lr, debuglog, errorlog, debug): + if debug: + num_sr = len(lr.sr_conflicts) + if num_sr > 0: + errorlog.warning('%d shift/reduce conflicts', num_sr) + + num_rr = len(lr.rr_conflicts) + if num_rr > 0: + errorlog.warning('%d reduce/reduce conflicts', num_rr) + + # Report conflicts to debug log + if lr.sr_conflicts or lr.rr_conflicts: + debuglog.warning('') + debuglog.warning('Conflicts:') + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + for state, rule, rejected in lr.rr_conflicts: + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + + +def build_parser(lr, pinfo): + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr, pinfo.error_func) + global parse + parse = parser.parse + return parser -- Gitee From 9b733878f2b9b7eb26f999d0921eee6e21568c45 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Thu, 12 Dec 2024 16:33:08 +0800 Subject: [PATCH 44/87] safasffa --- script/local/parser/my_yacc.py | 2303 +++----------------------------- script/local/parser/yacc.py | 97 +- 2 files changed, 226 insertions(+), 2174 deletions(-) diff --git a/script/local/parser/my_yacc.py b/script/local/parser/my_yacc.py index 11390a1a..126888ce 100644 --- a/script/local/parser/my_yacc.py +++ b/script/local/parser/my_yacc.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- ############################################################################# -# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# Copyright (c) 2024 Huawei Technologies Co.,Ltd. # # openGauss is licensed under Mulan PSL v2. # You can use this software according to the terms @@ -10,2149 +10,170 @@ # # http://license.coscl.org.cn/MulanPSL2 # -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" bASIS, +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, # WITHOUT WARRANTIES OF ANY KIND, -# EITHER EXPRESS OR IMPLIED, INCLUDING bUT NOT LIMITED TO NON-INFRINGEMENT, -# MERCHANTAbILITY OR FIT FOR A PARTICULAR PURPOSE. +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- -# Description : LocalCheckOS.py is a utility to check OS info on local node. +# Description : MyYacc.py is a utility to check security configurations info on local node. ############################################################################# -import re -import types +import os import sys -import inspect -# ----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -# ----------------------------------------------------------------------------- - -YACC_DEBUG = False # Debugging mode. If set, yacc generates a -# a 'parser.out' file in the current directory - -DEBUG_FILE = 'parser.out' # Default name of the debugging file -ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode -RESULT_LIMIT = 40 # Size limit of results when running in debug mode. - -MAXINT = sys.maxsize - - -class Logger(object): - def __init__(self, f): - self.f = f - - def debug(self, msg, *args, **kwargs): - self.f.write((msg % args) + '\n') - - info = debug - - def warning(self, msg, *args, **kwargs): - self.f.write('WARNING: ' + (msg % args) + '\n') - - def error(self, msg, *args, **kwargs): - self.f.write('ERROR: ' + (msg % args) + '\n') - - critical = debug - - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self, name): - return self - - def __call__(self, *args, **kwargs): - return self - - -# Exception raised for yacc-related errors -class YaccError(Exception): - pass - - -# Format the result message that the parser produces when running in debug mode. -def format_result(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) > RESULT_LIMIT: - repr_str = repr_str[:RESULT_LIMIT] + ' ...' - result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) - return result - - -# Format stack entries when the parser is running in debug mode -def format_stack_entry(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) < 16: - return repr_str - else: - return '<%s @ 0x%x>' % (type(r).__name__, id(r)) - - -# ----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -# ----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: - -class YaccSymbol: - def __str__(self): - return self.type - - def __repr__(self): - return str(self) - - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self, s, stack=None): - self.slice = s - self.stack = stack - self.lexer = None - self.parser = None - - def __getitem__(self, n): - if isinstance(n, slice): - return [s.value for s in self.slice[n]] - elif n >= 0: - return self.slice[n].value - else: - return self.stack[n].value - - def __setitem__(self, n, v): - self.slice[n].value = v - - def __getslice__(self, i, j): - return [s.value for s in self.slice[i:j]] - - def __len__(self): - return len(self.slice) - - def lineno(self, n): - return getattr(self.slice[n], 'lineno', 0) - - def set_lineno(self, n, lineno): - self.slice[n].lineno = lineno - - def linespan(self, n): - startline = getattr(self.slice[n], 'lineno', 0) - endline = getattr(self.slice[n], 'endlineno', startline) - return startline, endline - - def lexpos(self, n): - return getattr(self.slice[n], 'lexpos', 0) - - def set_lexpos(self, n, lexpos): - self.slice[n].lexpos = lexpos - - def lexspan(self, n): - startpos = getattr(self.slice[n], 'lexpos', 0) - endpos = getattr(self.slice[n], 'endlexpos', startpos) - return startpos, endpos - - @staticmethod - def error(): - raise SyntaxError - - -# ----------------------------------------------------------------------------- -# == LRParser == -# -# The LR Parsing engine. -# ----------------------------------------------------------------------------- - -class LRParser: - def __init__(self, lrtab, errorf): - self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf - self.set_defaulted_states() - self.errorok = True - - def errok(self): - self.errorok = True - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - # Defaulted state support. - # This method identifies parser states where there is only one possible reduction action. - # For such states, the parser can make a choose to make a rule reduction without consuming - # the next look-ahead token. This delayed invocation of the tokenizer can be useful in - # certain kinds of advanced parsing situations where the lexer and parser interact with - # each other or change states (i.e., manipulation of scope, lexer states, etc.). - # - def set_defaulted_states(self): - self.defaulted_states = {} - for state, actions in self.action.items(): - rules = list(actions.values()) - if len(rules) == 1 and rules[0] < 0: - self.defaulted_states[state] = rules[0] - - def disable_defaulted_states(self): - self.defaulted_states = {} - - def parse(self, put=None, lexer=None, debug=False, tracking=False): - debug, lexer = self._initialize_parser(debug, lexer) - lookahead = None - lookaheadstack = [] - actions = self.action - goto = self.goto - prod = self.productions - defaulted_states = self.defaulted_states - pslice = YaccProduction(None) - errorcount = 0 - pslice.lexer = lexer - pslice.parser = self - if put is not None: - lexer.input(put) - get_token = self.token = lexer.token - statestack = self.statestack = [] - symstack = self.symstack = [] - pslice.stack = symstack - errtoken = None - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while True: - lookahead, lookaheadstack, state, t = self.parse_step(state, lookahead, lookaheadstack, statestack, - symstack, actions, defaulted_states, debug, get_token) - if t is not None: - if t > 0: - # Call the new shift_and_goto function - state, symstack, lookahead, errorcount = self.shift_and_goto(t, statestack, symstack, lookahead, - debug, errorcount) - continue - if t < 0: - p = prod[-t] - pname = p.name - plen = p.len - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - if debug: - self.log_reduce_action(debug, p, plen, symstack, statestack, goto) - if plen: - targ = symstack[-plen - 1:] - targ[0] = sym - self.update_tracking_info(tracking, targ, sym) - pslice.slice = targ - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - self.state = state - p.callable(pslice) - del statestack[-plen:] - self.log_debug_info(debug, pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - lookaheadstack.append(lookahead) # Save the current lookahead token - symstack.extend(targ[1:-1]) # Put the production slice back on the stack - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym - errorcount = ERROR_COUNT - self.errorok = False - continue - else: - self.update_tracking_info(tracking, sym, lexer, pslice) - try: - self.state = state - p.callable(pslice) - self.log_debug_info(debug, pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - lookaheadstack.append(lookahead) # Save the current lookahead token - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym - errorcount = ERROR_COUNT - self.errorok = False - continue - if t == 0: - n = symstack[-1] - result = getattr(n, 'value', None) - self.log_parse_debug_info() - return result - if t is None: - if debug: - debug.error('Error : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - if errorcount == 0 or self.errorok: - errorcount = ERROR_COUNT - self.errorok = False - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, 'lexer'): - errtoken.lexer = lexer - self.state = state - tok = self.errorfunc(errtoken) - if self.errorok: - lookahead = tok - errtoken = None - continue - else: - self.handle_syntax_error(errtoken, lookahead) - - else: - errorcount = ERROR_COUNT - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - del lookaheadstack[:] - continue - if lookahead.type == '$end': - return - if lookahead.type != 'error': - lookahead = self.handle_error(lookahead, symstack, lookaheadstack, tracking) - else: - state = self.pop_and_update_state(symstack, statestack, tracking, lookahead) - continue - raise RuntimeError('yacc: internal parser error!!!\n') - - @staticmethod - def _initialize_parser(debug, lexer): - if isinstance(debug, int) and debug: - debug = Logger(sys.stderr) - if not lexer: - from . import lex - lexer = lex.lexer - return debug, lexer - - @staticmethod - def parse_step(state, lookahead, lookaheadstack, statestack, symstack, actions, defaulted_states, debug, - get_token): - if debug: - debug.debug('State : %s', state) - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - if debug: - debug.debug('Defaulted state %s: Reduce using %d', state, -t) - - if debug: - debug.debug('Stack : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - - return lookahead, lookaheadstack, state, t - - @staticmethod - def shift_and_goto(t, statestack, symstack, lookahead, debug, errorcount): - """Handle the shift and goto action during parsing.""" - statestack.append(t) # Shift the state - state = t - if debug: - debug.debug('Action : Shift and goto state %s', t) - symstack.append(lookahead) # Push the symbol onto the symbol stack - lookahead = None # Reset the lookahead token - if errorcount: - errorcount -= 1 # Decrement error count if there was a previous error - return state, symstack, lookahead, errorcount - - @staticmethod - def update_tracking_info(tracking, targ, sym): - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, 'endlineno', t1.lineno) - sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) - - @staticmethod - def handle_error(lookahead, symstack, lookaheadstack, tracking): - sym = symstack[-1] - if sym.type == 'error': - if tracking: - sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) - sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) - lookahead = None - return lookahead - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead, 'lineno'): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, 'lexpos'): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - return lookahead - - @staticmethod - def handle_syntax_error(errtoken, lookahead): - if errtoken: - if hasattr(errtoken, 'lineno'): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) - else: - sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) - else: - sys.stderr.write('yacc: Parse error in input. EOF\n') - return - - @staticmethod - def log_reduce_action(debug, p, plen, symstack, statestack, goto): - if plen: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, - '[' + ','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + ']', - goto[statestack[-1 - plen]][p.name]) - else: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], - goto[statestack[-1]][p.name]) - - @staticmethod - def log_debug_info(debug, pslice): - if debug: - debug.info('Result : %s', format_result(pslice[0])) - - @staticmethod - def update_tracking_sym(tracking, sym, lexer, pslice): - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - targ = [sym] - pslice.slice = targ - - @staticmethod - def log_parse_debug_info(debug, result): - if debug: - debug.info('Done : Returning %s', format_result(result)) - debug.info('PARSE DEbUG END') - - @staticmethod - def pop_and_update_state(symstack, statestack, tracking, lookahead): - # 弹出符号栈的顶部元素 - sym = symstack.pop() - - # 如果需要跟踪信息,更新lookahead的行号和位置 - if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos - - # 弹出状态栈的顶部元素,并更新当前状态 - statestack.pop() - state = statestack[-1] - - # 返回更新后的状态 - return state - - -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') - - -class Production(object): - reduced = 0 - - def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): - self.name = name - self.prod = tuple(prod) - self.number = number - self.func = func - self.callable = None - self.file = file - self.line = line - self.prec = precedence - - # Internal settings used during table construction - - self.len = len(self.prod) # Length of the production - - # Create a list of unique production symbols used in the production - self.usyms = [] - for s in self.prod: - if s not in self.usyms: - self.usyms.append(s) - - # List of all LR items for the production - self.lr_items = [] - self.lr_next = None - - # Create a string representation - if self.prod: - self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) - else: - self.str = '%s -> ' % self.name - - def __str__(self): - return self.str - - def __repr__(self): - return 'Production(' + str(self) + ')' - - def __len__(self): - return len(self.prod) - - def __nonzero__(self): - return 1 - - def __getitem__(self, index): - return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self, n): - if n > len(self.prod): - return None - p = LRItem(self, n) - # Precompute the list of productions immediately following. - try: - p.lr_after = self.Prodnames[p.prod[n + 1]] - except (IndexError, KeyError): - p.lr_after = [] - try: - p.lr_before = p.prod[n - 1] - except IndexError: - p.lr_before = None - return p - - # bind the production function name to a callable - def bind(self, pdict): - if self.func: - self.callable = pdict[self.func] - - -class LRItem(object): - def __init__(self, p, n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n - self.lookaheads = {} - self.prod.insert(n, '.') - self.prod = tuple(self.prod) - self.len = len(self.prod) - self.usyms = p.usyms - - def __str__(self): - if self.prod: - s = '%s -> %s' % (self.name, ' '.join(self.prod)) - else: - s = '%s -> ' % self.name - return s - - def __repr__(self): - return 'LRItem(' + str(self) + ')' - - -# ----------------------------------------------------------------------------- -# -# Return the rightmost terminal from a list of symbols. Used in add_production() -# ----------------------------------------------------------------------------- -def rightmost_terminal(symbols, terminals): - i = len(symbols) - 1 - while i >= 0: - if symbols[i] in terminals: - return symbols[i] - i -= 1 - return None - - -# ----------------------------------------------------------------------------- -# === GRAMMAR CLASS === -# -# The following class represents the contents of the specified grammar along -# with various computed properties such as first sets, follow sets, LR items, etc. -# This data is used for critical parts of the table generation process later. -# ----------------------------------------------------------------------------- - -class GrammarError(YaccError): - pass - - -class Grammar(object): - def __init__(self, terminals): - self.productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - self.prodnames = {} # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - self.prodmap = {} # A dictionary that is only used to detect duplicate - # productions. - - self.terminals = {} # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - for term in terminals: - self.terminals[term] = [] - - self.terminals['error'] = [] - - self.nonterminals = {} # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - self.first = {} # A dictionary of precomputed first(x) symbols - - self.follow = {} # A dictionary of precomputed follow(x) symbols - - self.precedence = {} # precedencerules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - self.usedprecedence = set() # precedencerules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedencerules. - - self.start = None # starting symbol for the grammar - - def __len__(self): - return len(self.productions) - - def __getitem__(self, index): - return self.productions[index] - - # ----------------------------------------------------------------------------- - # - # Sets the precedencefor a given terminal. assoc is the associativity such as - # 'left','right', or 'nonassoc'. level is a numeric level. - # - # ----------------------------------------------------------------------------- - - def set_precedence(self, term, assoc, level): - assert self.productions == [None], 'Must call set_precedence() before add_production()' - if term in self.precedence: - raise GrammarError('precedencealready specified for terminal %r' % term) - if assoc not in ['left', 'right', 'nonassoc']: - raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.precedence[term] = (assoc, level) - - # ----------------------------------------------------------------------------- - # - # Given an action function, this function assembles a production rule and - # computes its precedencelevel. - # - # The production rule is supplied as a list of symbols. For example, - # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and - # symbols ['expr','PLUS','term']. - # - # precedenceis determined by the precedenceof the right-most non-terminal - # or the precedenceof a terminal specified by %prec. - # - # A variety of error checks are performed to make sure production symbols - # are valid and that %prec is used correctly. - # ----------------------------------------------------------------------------- - - def validate_prodname(self, prodname, file, line): - """Validate the production name.""" - if prodname in self.terminals: - raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token') - if prodname == 'error': - raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word') - if not _is_identifier.match(prodname): - raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}') - - def handle_literal_tokens(self, syms, file, line, prodname): - """Handle literal tokens in the rule symbols.""" - for n, s in enumerate(syms): - if s[0] in "'\"": - c = self.proccess_literal_token(s, file, line, prodname) - if c is not None: - syms[n] = c - continue - if not _is_identifier.match(s) and s != '%prec': - raise GrammarError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') - - def proccess_literal_token(self, s, file, line, prodname): - """处理文字(literal)token.""" - try: - c = eval(s) - if len(c) > 1: - raise GrammarError( - f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') - if c not in self.terminals: - self.terminals[c] = [] - return c - except SyntaxError: - pass - return None - - def handle_precedence(self, syms, file, line): - """Handle precedencesettings in the rule.""" - if '%prec' in syms: - if syms[-1] == '%prec': - raise GrammarError(f'{file}:{line}: Syntax error. Nothing follows %%prec') - if syms[-2] != '%prec': - raise GrammarError(f'{file}:{line}: Syntax error. %%prec can only appear at the end of a grammar rule') - precname = syms[-1] - prodprec = self.precedence.get(precname) - if not prodprec: - raise GrammarError(f'{file}:{line}: Nothing known about the precedenceof {precname!r}') - self.usedprecedence.add(precname) - del syms[-2:] # Drop %prec from the rule - return prodprec - else: - # If no %prec, precedenceis determined by the rightmost terminal symbol - precname = rightmost_terminal(syms, self.terminals) - return self.precedence.get(precname, ('right', 0)) - - def check_duplicate_rule(self, prodname, syms, file, line): - """Check for duplicate rule definitions.""" - rule_map = f'{prodname} -> {syms}' - if rule_map in self.prodmap: - m = self.prodmap[rule_map] - raise GrammarError(f'{file}:{line}: Duplicate rule {rule_map}. Previous definition at {m.file}:{m.line}') - - def add_production(self, prodname, syms, func=None, file='', line=0): - """Main method to add a production.""" - # Validate the production name - self.validate_prodname(prodname, file, line) - - # Handle literal tokens in the symbols - self.handle_literal_tokens(syms, file, line, prodname) - - # Handle precedence - prodprec = self.handle_precedence(syms, file, line) - - # Check for duplicate rules - self.check_duplicate_rule(prodname, syms, file, line) - - # Create a new production instance - pnumber = len(self.productions) - if prodname not in self.nonterminals: - self.nonterminals[prodname] = [] - - # Add the production number to terminals and nonterminals - for t in syms: - if t in self.terminals: - self.terminals[t].append(pnumber) - else: - if t not in self.nonterminals: - self.nonterminals[t] = [] - self.nonterminals[t].append(pnumber) - - # Create and add the production - p = Production(pnumber, prodname, syms, prodprec, func, file, line) - self.productions.append(p) - self.prodmap[f'{prodname} -> {syms}'] = p - - # Add to the global productions list - try: - self.prodnames[prodname].append(p) - except KeyError: - self.prodnames[prodname] = [p] - - # ----------------------------------------------------------------------------- - # - # Sets the starting symbol and creates the augmented grammar. Production - # rule 0 is S' -> start where start is the start symbol. - # ----------------------------------------------------------------------------- - - def set_start(self, start=None): - if not start: - start = self.productions[1].name - if start not in self.nonterminals: - raise GrammarError('start symbol %s undefined' % start) - self.productions[0] = Production(0, "S'", [start]) - self.nonterminals[start].append(0) - self.start = start - - # ----------------------------------------------------------------------------- - # - # Find all of the nonterminal symbols that can't be reached from the starting - # symbol. Returns a list of nonterminals that can't be reached. - # ----------------------------------------------------------------------------- - - def find_unreachable(self): - - # Mark all symbols that are reachable from a symbol s - def mark_reachable_from(s): - if s in reachable: - return - reachable.add(s) - for p in self.prodnames.get(s, []): - for r in p.prod: - mark_reachable_from(r) - - reachable = set() - mark_reachable_from(self.productions[0].prod[0]) - return [s for s in self.nonterminals if s not in reachable] - - # ----------------------------------------------------------------------------- - # - # This function looks at the various parsing rules and tries to detect - # infinite recursion cycles (grammar rules where there is no possible way - # to derive a string of only terminals). - # ----------------------------------------------------------------------------- - - def infinite_cycles(self): - terminates = {} - - # terminals: - for t in self.terminals: - terminates[t] = True - - terminates['$end'] = True - - # nonterminals: - - # Initialize to false: - for n in self.nonterminals: - terminates[n] = False - - # Propagate termination until no change - self.propagate_termination(terminates) - - # Collect symbols that do not terminate - infinite = self.collect_infinite(terminates) - - return infinite - - def propagate_termination(self, terminates): - while True: - some_change = False - for (n, pl) in self.prodnames.items(): - some_change |= self.check_productions_for_termination(n, pl, terminates) - if not some_change: - break - - def check_productions_for_termination(self, n, productions, terminates): - some_change = False - for p in productions: - p_terminates = self.check_production_termination(p, terminates) - if p_terminates: - if not terminates[n]: - terminates[n] = True - some_change = True - # Don't need to consider any more productions for this nonterminal. - break - return some_change - - def check_production_termination(self, production, terminates): - for s in production.prod: - if not terminates.get(s, False): - # If any symbol does not terminate, the production does not terminate. - return False - # All symbols terminate, so production terminates. - return True - - def collect_infinite(self, terminates): - infinite = [] - for (s, term) in terminates.items(): - if not term: - if s not in self.prodnames and s not in self.terminals and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - infinite.append(s) - return infinite - - def undefined_symbols(self): - result = [] - for p in self.productions: - if not p: - continue - - for s in p.prod: - if s not in self.prodnames and s not in self.terminals and s != 'error': - result.append((s, p)) - return result - - def unused_terminals(self): - unused_tok = [] - for s, v in self.terminals.items(): - if s != 'error' and not v: - unused_tok.append(s) - - return unused_tok - - def unused_rules(self): - unused_prod = [] - for s, v in self.nonterminals.items(): - if not v: - p = self.prodnames[s][0] - unused_prod.append(p) - return unused_prod - - # ----------------------------------------------------------------------------- - # - # Returns a list of tuples (term,precedence) corresponding to precedence - # rules that were never used by the grammar. term is the name of the terminal - # on which precedencewas applied and precedenceis a string such as 'left' or - # 'right' corresponding to the type of precedence. - # ----------------------------------------------------------------------------- - - def unused_precedence(self): - unused = [] - for termname in self.precedence: - if not (termname in self.terminals or termname in self.usedprecedence): - unused.append((termname, self.precedence[termname][0])) - - return unused - - def _first(self, beta): - # We are computing first(x1,x2,x3,...,xn) - result = [] - for x in beta: - x_produces_empty = self._process_first_set(x, result) - if not x_produces_empty: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('') - - return result - - def _process_first_set(self, x, result): - x_produces_empty = False - # Add all the non- symbols of first[x] to the result. - for f in self.first[x]: - if f == '': - x_produces_empty = True - else: - if f not in result: - result.append(f) - return x_produces_empty - - def compute_first(self): - if self.first: - return self.first - # terminals: - for t in self.terminals: - self.first[t] = [t] - self.first['$end'] = ['$end'] - # nonterminals: - # Initialize to the empty set: - for n in self.nonterminals: - self.first[n] = [] - # Then propagate symbols until no change: - while True: - some_change = False - some_change = self._propagate_first() - if not some_change: - break - return self.first - - def _propagate_first(self): - some_change = False - for n in self.nonterminals: - some_change |= self._update_first_set(n) - return some_change - - def _update_first_set(self, nonterminal): - some_change = False - for p in self.prodnames[nonterminal]: - for f in self._first(p.prod): - if f not in self.first[nonterminal]: - self.first[nonterminal].append(f) - some_change = True - return some_change - - def compute_follow(self, start=None): - # If already computed, return the result - if self.follow: - return self.follow - - # If first sets not computed yet, do that first. - if not self.first: - self.compute_first() - - # Add '$end' to the follow list of the start symbol - for k in self.nonterminals: - self.follow[k] = [] - - if not start: - start = self.productions[1].name - - self.follow[start] = ['$end'] - - while True: - didadd = self.process_productions() - if not didadd: - break - - return self.follow - - def process_productions(self): - didadd = False - for p in self.productions[1:]: - didadd = self.process_production(p, didadd) - return didadd - - def process_production(self, p, didadd): - for i, b in enumerate(p.prod): - if b in self.nonterminals: - fst = self._first(p.prod[i + 1:]) - didadd = self.process_first_set(fst, b, p, i, didadd) - return didadd - - def process_first_set(self, fst, b, p, i, didadd): - hasempty = False - for f in fst: - if f != '' and f not in self.follow[b]: - self.follow[b].append(f) - didadd = True - if f == '': - hasempty = True - if hasempty or i == (len(p.prod) - 1): - didadd = self.add_follow_to_nonterminal(p, b, didadd) - return didadd - - def add_follow_to_nonterminal(self, p, b, didadd): - for f in self.follow[p.name]: - if f not in self.follow[b]: - self.follow[b].append(f) - didadd = True - return didadd - - def build_lritems(self): - for p in self.productions: - lastlri = p - i = 0 - lr_items = [] - while True: - lri = self._process_lr_item(p, i, lastlri) - if not lri: - break - lr_items.append(lri) - lastlri = lri - i += 1 - p.lr_items = lr_items - - def _process_lr_item(self, p, i, lastlri): - """ - Process a single LR item step and return the next lri object. - """ - if i > len(p): - lri = None - else: - lri = LRItem(p, i) - # Precompute the list of productions immediately following - try: - lri.lr_after = self.prodnames[lri.prod[i + 1]] - except (IndexError, KeyError): - lri.lr_after = [] - try: - lri.lr_before = lri.prod[i - 1] - except IndexError: - lri.lr_before = None - - lastlri.lr_next = lri - return lri - - -def digraph(nodes, edges, fp): - # 初始化每个节点的状态为0 - n = {} - for node in nodes: - n[node] = 0 - - stack = [] - f = {} - - # 遍历图中的每个节点 - for node in nodes: - if n[node] == 0: - traverse(node, n, stack, f, nodes, edges, fp) - - return f - - -def traverse(x, n, stack, f, x_values, r, fp): - stack.append(x) - d = len(stack) - n[x] = d - f[x] = fp(x) # f(x) <- f'(x) - - related = r(x) # Get y's related to x - for y in related: - if n[y] == 0: - traverse(y, n, stack, f, x_values, r, fp) - n[x] = min(n[x], n[y]) - for a in f.get(y, []): - if a not in f[x]: - f[x].append(a) - if n[x] == d: - n[stack[-1]] = MAXINT - f[stack[-1]] = f[x] - element = stack.pop() - while element != x: - n[stack[-1]] = MAXINT - f[stack[-1]] = f[x] - element = stack.pop() - - -class LALRError(YaccError): - pass - - -# ----------------------------------------------------------------------------- -# == LRTable == -# -# This class implements the LR table generation algorithm. There are no -# public methods. -# ----------------------------------------------------------------------------- - -class LRTable: - def __init__(self, grammar, log=None): - self.grammar = grammar - - # Set up the logger - if not log: - log = NullLogger() - self.log = log - - # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures - - self._add_count = 0 # Internal counter used to detect cycles - - # Diagnostic information filled in by the table generator - self.sr_conflict = 0 - self.rr_conflict = 0 - self.conflicts = [] # List of conflicts - - self.sr_conflicts = [] - self.rr_conflicts = [] - - # build the tables - self.grammar.build_lritems() - self.grammar.compute_first() - self.grammar.compute_follow() - self.lr_parse_table() - - # bind all production function names to callable objects in pdict - def bind_callables(self, pdict): - for p in self.lr_productions: - p.bind(pdict) - - def lr0_closure(self, input_items): - self._add_count += 1 - closure_items = input_items[:] - did_add = True - while did_add: - did_add = self._process_lr0_closure(closure_items) - return closure_items - - def _process_lr0_closure(self, closure_items): - """ - Process a single step of the lr0 closure algorithm. - It tries to add new LR items to the closure. - """ - did_add = False - for item in closure_items: - for x in item.lr_after: - if getattr(x, 'lr0_added', 0) == self._add_count: - continue - # Add b --> .G to closure_items - closure_items.append(x.lr_next) - x.lr0_added = self._add_count - did_add = True - - return did_add - - def lr0_goto(self, input_items, x): - # first we look for a previously cached entry - g = self.lr_goto_cache.get((id(input_items), x)) - if g: - return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = self.lr_goto_cache.get(x) - if not s: - s = {} - self.lr_goto_cache[x] = s - - gs = [] - for p in input_items: - n = p.lr_next - if n and n.lr_before == x: - s1 = s.get(id(n)) - if not s1: - s1 = {} - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end') - if not g: - if gs: - g = self.lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - self.lr_goto_cache[(id(input_items), x)] = g - return g - - def lr0_items(self): - closure_set = [self.lr0_closure([self.grammar.productions[0].lr_next])] - i = 0 - for item_set in closure_set: - self.lr0_cidhash[id(item_set)] = i - i += 1 - i = 0 - while i < len(closure_set): - item_set = closure_set[i] - i += 1 - symbols = {} - for item in item_set: - for symbol in item.usyms: - symbols[symbol] = None - for symbol in symbols: - g = self.lr0_goto(item_set, symbol) - if not g or id(g) in self.lr0_cidhash: - continue - self.lr0_cidhash[id(g)] = len(closure_set) - closure_set.append(g) - return closure_set - - def compute_nullable_nonterminals(self): - nullable = set() - num_nullable = 0 - while True: - num_nullable = self._process_nullable_step(nullable, num_nullable) - if len(nullable) == num_nullable: - break - return nullable - - def _process_nullable_step(self, nullable, num_nullable): - for p in self.grammar.productions[1:]: - if p.len == 0: - nullable.add(p.name) - continue - for t in p.prod: - if t not in nullable: - break - else: - nullable.add(p.name) - return len(nullable) - - def find_nonterminal_transitions(self, input_item): - trans = [] - for stateno, state in enumerate(input_item): - for p in state: - self._process_transition(p, stateno, trans) - return trans - - def _process_transition(self, p, stateno, trans): - """ - Process a single transition and update the trans list. - This method checks if the transition should be added. - """ - if p.lr_index < p.len - 1: - t = (stateno, p.prod[p.lr_index + 1]) - if t[1] in self.grammar.nonterminals: - if t not in trans: - trans.append(t) - - def dr_relation(self, input_item, trans, nullable): - state, n = trans - terms = [] - g = self.lr0_goto(input_item[state], n) - for p in g: - self._process_relation(p, terms) - if state == 0 and n == self.grammar.productions[0].prod[0]: - terms.append('$end') - return terms - - def _process_relation(self, p, terms): - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in self.grammar.terminals: - if a not in terms: - terms.append(a) - - def reads_relation(self, item, trans, empty): - # Look for empty transitions - rel = [] - state, n = trans - - g = self.lr0_goto(item[state], n) - j = self.lr0_cidhash.get(id(g), -1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j, a)) - - return rel - - def compute_lookback_includes(self, item, trans, nullable): - lookdict = {} - includedict = {} - dtrans = {t: 1 for t in trans} - for state, n in trans: - lookb = [] - includes = [] - for p in item[state]: - if p.name != n: - continue - self._process_lookback_and_include(item, state, p, dtrans, includes, lookb, nullable) - for i in includes: - if i not in includedict: - includedict[i] = [] - includedict[i].append((state, n)) - lookdict[(state, n)] = lookb - return lookdict, includedict - - def _process_lookback_and_include(self, item, state, p, dtrans, includes, lookb, nullable): - """ - Process lookback and include relations for a single production. - This handles the inner `while` loop logic and `lookb` and `includes` updates. - """ - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index += 1 - t = p.prod[lr_index] - if (j, t) in dtrans: - self._process_include_relation(p, lr_index, j, t, includes, nullable) - g = self.lr0_goto(item[j], t) - j = self.lr0_cidhash.get(id(g), -1) - self._process_lookback_relation(item, j, p, lookb) - - def _process_include_relation(self, p, lr_index, j, t, includes, nullable): - """ - Process the includes relation based on the production and nullable symbols. - """ - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.terminals: - break - if p.prod[li] not in nullable: - break - li += 1 - else: - includes.append((j, t)) - - @staticmethod - def _process_lookback_relation(item, j, p, lookb): - """ - Process the lookback relation by comparing the current and previous productions. - """ - for r in item[j]: - if r.name != p.name: - continue - if r.len != p.len: - continue - i = 0 - while i < r.lr_index: - if r.prod[i] != p.prod[i + 1]: - break - i += 1 - else: - lookb.append((j, r)) - - def compute_read_sets(self, c, ntrans, nullable): - fp = lambda x: self.dr_relation(c, x, nullable) - r = lambda x: self.reads_relation(c, x, nullable) - f = digraph(ntrans, r, fp) - return f - - @staticmethod - def compute_follow_sets(ntrans, readsets, inclsets): - fp = lambda x: readsets[x] - r = lambda x: inclsets.get(x, []) - f = digraph(ntrans, r, fp) - return f - - def add_lookaheads(self, lookbacks, followset): - for trans, lb in lookbacks.items(): - # Loop over productions in lookback - for state, p in lb: - self._ensure_lookaheads(p, state) # Ensure lookaheads for the production - - f = followset.get(trans, []) - self._add_lookaheads_to_production(p, state, f) # Add lookaheads from followset - - @staticmethod - def _ensure_lookaheads(p, state): - if state not in p.lookaheads: - p.lookaheads[state] = [] - - @staticmethod - def _add_lookaheads_to_production(p, state, followset_elements): - for a in followset_elements: - if a not in p.lookaheads[state]: - p.lookaheads[state].append(a) - - # ----------------------------------------------------------------------------- - # - # This function does all of the work of adding lookahead information for use - # with LALR parsing - # ----------------------------------------------------------------------------- - - def add_lalr_lookaheads(self, c): - # Determine all of the nullable nonterminals - nullable = self.compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(c) - - # Compute read sets - readsets = self.compute_read_sets(c, trans, nullable) - - # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(c, trans, nullable) - - # Compute LALR follow sets - followsets = self.compute_follow_sets(trans, readsets, included) - - # Add all of the lookaheads - self.add_lookaheads(lookd, followsets) - - @staticmethod - def handle_shift_reduce_conflict(st, a, p, r, precedence, productions, log, j=None): - """Handle shift/reduce conflict.""" - if r > 0: - sprec, slevel = precedence.get(a, ('right', 0)) - rprec, rlevel = productions[p.number].prec - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - return -p.number, p, 'reduce', None - elif (slevel == rlevel) and (rprec == 'nonassoc'): - return None, None, None, None - else: - return j, p, 'shift', None - elif r < 0: - oldp = productions[-r] - pp = productions[p.number] - if oldp.line > pp.line: - return -p.number, p, 'reduce', oldp - else: - return -oldp.number, oldp, 'reduce', pp - return None, None, None, None - - @staticmethod - def log_shift_reduce_action(log, a, m): - """Log shift/reduce or reduce/reduce actions.""" - log.info(' %-15s %s', a, m) - - def process_state_transitions(self, st, item, st_action, precedence, productions, action, goto, log): - """Process state transitions and handle conflicts.""" - st_goto = {} - actlist = [] - st_actionp = {} - - for p in item: - if p.len == p.lr_index + 1: - self.handle_reduce_actions(st, p, st_action, st_actionp, precedence, productions, actlist, log) - else: - self.handle_shift_actions(st, p, st_action, st_actionp, precedence, productions, actlist, log, item) - - return st_action, st_actionp, st_goto, actlist - - def handle_reduce_actions(self, st, p, st_action, st_actionp, precedence, productions, actlist, log): - """Handle reduce actions.""" - if p.name == "S'": - st_action['$end'] = 0 - st_actionp['$end'] = p - else: - laheads = p.lookaheads[st] - for a in laheads: - actlist.append((a, p, f'reduce using rule {p.number} ({p})')) - r = st_action.get(a) - if r is not None: - self.handle_shift_reduce_conflict(st, a, p, r, precedence, productions, log) - else: - st_action[a] = -p.number - st_actionp[a] = p - productions[p.number].reduced += 1 - - def handle_shift_actions(self, st, p, st_action, st_actionp, precedence, productions, actlist, log, item): - """Handle shift actions.""" - i = p.lr_index - a = p.prod[i + 1] - if a in self.grammar.terminals: - g = self.lr0_goto(item, a) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - actlist.append((a, p, f'shift and go to state {j}')) - r = st_action.get(a) - if r is not None: - self.handle_shift_shift_conflict(st, a, r, j, precedence, productions, st_action, st_actionp, log, - p) - else: - st_action[a] = j - st_actionp[a] = p - - def handle_shift_shift_conflict(self, st, a, r, j, precedence, productions, st_action, st_actionp, log, p): - """Handle shift/shift conflicts.""" - if r > 0 and r != j: - raise LALRError(f'Shift/shift conflict in state {st}') - elif r < 0: - sprec, slevel = precedence.get(a, ('right', 0)) - rprec, rlevel = productions[st_actionp[a].number].prec - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - elif slevel == rlevel and rprec == 'nonassoc': - st_action[a] = None - else: - self.log_shift_reduce_action(self, log, a, "shift") - - def lr_parse_table(self): - productions = self.grammar.productions - precedence = self.grammar.precedence - goto = self.lr_goto - action = self.lr_action - log = self.log - actionp = {} - item = self.lr0_items() - self.add_lalr_lookaheads(item) - st = 0 - for i in item: - log.info('') - log.info(f'state {st}') - log.info('') - self._log_productions(i, log) # Log productions for the current state - log.info('') - - # Process the state transitions and conflicts - st_action = {} - st_actionp = {} - st_goto = {} - st_action, st_actionp, st_goto, actlist = self.process_state_transitions(st, i, st_action, precedence, - productions, action, goto, log) - self._log_actions(st_action, st_actionp, actlist, log) - self._handle_not_used_actions(st_action, st_actionp, actlist, log) - self._handle_state_transitions_for_nonterminals(i, st_goto, log) - action[st] = st_action - actionp[st] = st_actionp - goto[st] = st_goto - st += 1 - - @staticmethod - def _log_productions(item, log): - """ - Log the productions in a given state I. - """ - for p in item: - log.info(f' ({p.number}) {p}') - - @staticmethod - def _log_actions(st_action, st_actionp, actlist, log): - """ - Log actions for a given state transition. - """ - for a, p, m in actlist: - if a in st_action: - if p is st_actionp[a]: - log.info(' %-15s %s', a, m) - - def _handle_not_used_actions(self, st_action, st_actionp, actlist, log): - """ - Handle actions that are not used and log them. - """ - _actprint = {} - not_used = False - for a, p, m in actlist: - if a in st_action: - not_used = self._check_not_used_action(a, p, st_actionp, m, _actprint, log) or not_used - if not_used: - log.debug('') - - @staticmethod - def _check_not_used_action(a, p, st_actionp, m, _actprint, log): - """ - Check if the action is not used and log it. - """ - if p is not st_actionp[a]: - if (a, m) not in _actprint: - log.debug(f' ! %-15s [ {m} ]') - _actprint[(a, m)] = 1 - return True - return False - - def _handle_state_transitions_for_nonterminals(self, item, st_goto, log): - """ - Handle state transitions for nonterminals and log the corresponding transitions. - """ - nkeys = {} - for ii in item: - for s in ii.usyms: - if s in self.grammar.nonterminals: - nkeys[s] = None - for n in nkeys: - g = self.lr0_goto(item, n) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - st_goto[n] = j - log.info(f' %-30s shift and go to state {j}') - - -def get_caller_module_dict(levels): - f = sys._getframe(levels) - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - return ldict - - -# ----------------------------------------------------------------------------- -# -# This takes a raw grammar rule string and parses it into production data -# ----------------------------------------------------------------------------- -def parse_grammar(doc, file, line): - grammar = [] - pstrings = doc.splitlines() - dline = line - lastp = None - - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: - continue - try: - prodname, syms, lastp = parse_rule(p, lastp, dline, file, ps) - grammar.append((file, dline, prodname, syms)) - except SyntaxError: - raise - except Exception: - raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) - - return grammar - - -def parse_rule(p, lastp, dline, file, ps): - if p[0] == '|': - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) - prodname = lastp - syms = p[1:] - else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] - if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - - return prodname, syms, lastp - - -class ParserReflect(object): - def __init__(self, pdict, log=None): - self.pdict = pdict - self.start = None - self.error_func = None - self.tokens = None - self.modules = set() - self.grammar = [] - self.error = False - - if log is None: - self.log = Logger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_start() - self.get_error_func() - self.get_tokens() - self.get_precedence() - self.get_pfunctions() - - # Validate all of the information - def validate_all(self): - self.validate_start() - self.validate_error_func() - self.validate_tokens() - self.validate_precedence() - self.validate_pfunctions() - self.validate_modules() - return self.error - - # Compute a signature over the grammar - def signature(self): - parts = [] - try: - if self.start: - parts.append(self.start) - if self.prec: - parts.append(''.join([''.join(p) for p in self.prec])) - if self.tokens: - parts.append(' '.join(self.tokens)) - for f in self.pfuncs: - if f[3]: - parts.append(f[3]) - except (TypeError, ValueError): - pass - return ''.join(parts) - - def validate_modules(self): - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - - for module in self.modules: - try: - lines, linen = inspect.getsourcelines(module) - except IOError: - continue - self.check_function_redefinitions(lines, fre, module) - - def check_function_redefinitions(self, lines, fre, module): - counthash = {} - for linen, line in enumerate(lines, 1): - m = fre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if prev: - self.report_redefinition(module, linen, name, prev) - else: - counthash[name] = linen - - def report_redefinition(self, module, linen, name, prev): - filename = inspect.getsourcefile(module) - self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', - filename, linen, name, prev) - - # Get the start symbol - def get_start(self): - self.start = self.pdict.get('start') - - # Validate the start symbol - def validate_start(self): - if self.start is not None: - if not isinstance(self.start, str): - self.log.error("'start' must be a string") - - # Look for error handler - def get_error_func(self): - self.error_func = self.pdict.get('p_error') - - # Validate the error function - def validate_error_func(self): - if self.error_func: - if isinstance(self.error_func, types.FunctionType): - ismethod = 0 - elif isinstance(self.error_func, types.MethodType): - ismethod = 1 - else: - self.log.error("'p_error' defined, but is not a function or method") - self.error = True - return - - eline = self.error_func.__code__.co_firstlineno - efile = self.error_func.__code__.co_filename - module = inspect.getmodule(self.error_func) - self.modules.add(module) - - argcount = self.error_func.__code__.co_argcount - ismethod - if argcount != 1: - self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) - self.error = True - - # Get the tokens map - def get_tokens(self): - tokens = self.pdict.get('tokens') - if not isinstance(tokens, (list, tuple)): - self.log.error('tokens must be a list or tuple') - self.error = True - return - - if not tokens: - self.log.error('tokens is empty') - self.error = True - return - - self.tokens = sorted(tokens) - - # Validate the tokens - def validate_tokens(self): - # Validate the tokens. - if 'error' in self.tokens: - self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = True - return - - terminals = set() - for n in self.tokens: - if n in terminals: - self.log.warning('Token %r multiply defined', n) - terminals.add(n) - - # Get the precedencemap (if any) - def get_precedence(self): - self.prec = self.pdict.get('precedence') - - # Validate and parse the precedencemap - def validate_precedence(self): - preclist = [] - if self.prec: - if not isinstance(self.prec, (list, tuple)): - self.log.error('precedencemust be a list or tuple') - self.error = True - return - - for level, p in enumerate(self.prec): - if not isinstance(p, (list, tuple)): - self.log.error('bad precedencetable') - self.error = True - return - - if len(p) < 2: - self.log.error('Malformed precedenceentry %s. Must be (assoc, term, ..., term)', p) - self.error = True - return - - assoc = p[0] - if not isinstance(assoc, str): - self.log.error('precedenceassociativity must be a string') - self.error = True - return - - # 提取内部逻辑到一个子函数 - self._validate_terms_and_append(p[1:], assoc, level + 1, preclist) - - self.preclist = preclist - - def _validate_terms_and_append(self, terms, assoc, level, preclist): - for term in terms: - if not isinstance(term, str): - self.log.error('precedenceitems must be strings') - self.error = True - return - preclist.append((term, assoc, level + 1)) - - # Get all p_functions from the grammar - def get_pfunctions(self): - p_functions = [] - for name, item in self.pdict.items(): - if not name.startswith('p_') or name == 'p_error': - continue - if isinstance(item, (types.FunctionType, types.MethodType)): - line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) - module = inspect.getmodule(item) - p_functions.append((line, module, name, item.__doc__)) - - # Sort all of the actions by line number; make sure to stringify - # modules to make them sortable, since `line` may not uniquely sort all - # p functions - p_functions.sort(key=lambda p_function: ( - p_function[0], - str(p_function[1]), - p_function[2], - p_function[3])) - self.pfuncs = p_functions - - def validate_pfunctions(self): - grammar = [] - # Check for non-empty symbols - if len(self.pfuncs) == 0: - self.log.error('no rules of the form p_rulename are defined') - self.error = True - return - - for line, module, name, doc in self.pfuncs: - file = inspect.getsourcefile(module) - func = self.pdict[name] - if isinstance(func, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - if func.__code__.co_argcount > reqargs: - self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) - self.error = True - elif func.__code__.co_argcount < reqargs: - self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) - self.error = True - elif not func.__doc__: - self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', - file, line, func.__name__) - else: - self.process_grammar_rule(doc, file, line, name, grammar) - self.modules.add(module) - - for n, v in self.pdict.items(): - if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): - continue - if n.startswith('t_'): - continue - if n.startswith('p_') and n != 'p_error': - self.log.warning('%r not defined as a function', n) - - self._check_possible_grammar_rule(v, n) - - self.grammar = grammar - - # Validate all of the p_functions - def process_grammar_rule(self, doc, file, line, name, grammar): - # 处理文档字符串并解析语法 - parsed_g = self.parse_grammar_with_error_handling(doc, file, line) - if parsed_g is not None: - for g in parsed_g: - grammar.append((name, g)) - - def parse_grammar_with_error_handling(self, doc, file, line): - try: - return parse_grammar(doc, file, line) - except SyntaxError as e: - self.log.error(str(e)) - self.error = True - return None - - def _check_possible_grammar_rule(self, v, n): - """ - Helper function to check if a function might be a possible grammar rule. - This is extracted from the loop to reduce complexity. - """ - if not self._is_possible_grammar_function(v): - return - - if self._has_doc(v): - self._check_doc_for_grammar_rule(v, n) - - def _is_possible_grammar_function(self, v): - """Check if v is a possible grammar function based on argument count.""" - return ( - (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or - (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2) - ) - - def _has_doc(self, v): - """Check if v has a docstring.""" - return v.__doc__ is not None - - def _check_doc_for_grammar_rule(self, v, n): - """Check if the docstring of v follows the expected grammar rule format.""" - try: - doc = v.__doc__.split(' ') - if doc[1] == ':': - self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', - v.__code__.co_filename, v.__code__.co_firstlineno, n) - except IndexError: - pass - - -def yacc(*, debug=YACC_DEBUG, module=None, start=None, - check_recursion=True, optimize=False, debugfile=DEBUG_FILE, - debuglog=None, errorlog=None): - global parse - - # Initialize errorlog if None - if errorlog is None: - errorlog = Logger(sys.stderr) - - # Get the module dictionary used for the parser - pdict = get_module_dict(module) - - # Set start symbol if specified - if start is not None: - pdict['start'] = start - - # Collect parser information - pinfo = ParserReflect(pdict, log=errorlog) - pinfo.get_all() - - # Handle errors - if pinfo.error or pinfo.validate_all(): - raise YaccError('Unable to build parser') - - # Log warnings for missing error function - if not pinfo.error_func: - errorlog.warning('no p_error() function is defined') - - # Create a grammar object and add productions - grammar = create_grammar(pinfo, errorlog) - - # Set start symbol for grammar - set_start_symbol(start, pinfo, grammar, errorlog) - - # Verify the grammar structure - errors = verify_grammar(grammar, errorlog) - - if errors: - raise YaccError('Unable to build parser') - - # Check for recursion and conflicts - check_recursion_and_conflicts(grammar, errorlog, check_recursion) - - # Run the LRTable on the grammar and return the parser - lr = LRTable(grammar, debuglog) - report_conflicts(lr, debuglog, errorlog, debug) - return build_parser(lr, pinfo) - - -def get_module_dict(module): - if module: - return get_module_dict_from_module(module) - return get_caller_module_dict(2) - - -def get_module_dict_from_module(module): - _items = [(k, getattr(module, k)) for k in dir(module)] - pdict = dict(_items) - - # Ensure that __file__ and __package__ are set if not present - if '__file__' not in pdict: - pdict['__file__'] = sys.modules[pdict['__module__']].__file__ - if '__package__' not in pdict and '__module__' in pdict: - if hasattr(sys.modules[pdict['__module__']], '__package__'): - pdict['__package__'] = sys.modules[pdict['__module__']].__package__ - return pdict - - -def create_grammar(pinfo, errorlog): - grammar = Grammar(pinfo.tokens) - - # Set precedencelevel for terminals - for term, assoc, level in pinfo.preclist: - try: - grammar.set_precedence(term, assoc, level) - except GrammarError as e: - errorlog.warning('%s', e) - - # Add productions to the grammar - for funcname, gram in pinfo.grammar: - file, line, prodname, syms = gram - try: - grammar.add_production(prodname, syms, funcname, file, line) - except GrammarError as e: - errorlog.error('%s', e) - - return grammar - - -def set_start_symbol(start, pinfo, grammar, errorlog): - try: - if start is None: - grammar.set_start(pinfo.start) - else: - grammar.set_start(start) - except GrammarError as e: - errorlog.error(str(e)) - - -def verify_grammar(grammar, errorlog): - errors = False - - # Verify undefined symbols - undefined_symbols = grammar.undefined_symbols() - for sym, prod in undefined_symbols: - errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) - errors = True - - # Check unused terminals - unused_terminals = grammar.unused_terminals() - if unused_terminals: - report_unused_terminals(unused_terminals, errorlog) - - # Check unused non-terminals - unused_rules = grammar.unused_rules() - report_unused_rules(unused_rules, errorlog) - - if len(unused_terminals) > 1: - errorlog.warning('There are %d unused tokens', len(unused_terminals)) - if len(unused_rules) > 1: - errorlog.warning('There are %d unused rules', len(unused_rules)) - - # Log recursion or other errors - return errors - - -def report_unused_terminals(unused_terminals, errorlog): - errorlog.warning('Unused terminals:') - for term in unused_terminals: - errorlog.warning('Token %r defined, but not used', term) - - -def report_unused_rules(unused_rules, errorlog): - for prod in unused_rules: - errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) - - -def check_recursion_and_conflicts(grammar, errorlog, check_recursion): - if check_recursion: - unreachable = grammar.find_unreachable() - for u in unreachable: - errorlog.warning('Symbol %r is unreachable', u) - - infinite = grammar.infinite_cycles() - for inf in infinite: - errorlog.error('Infinite recursion detected for symbol %r', inf) - - unused_prec = grammar.unused_precedence() - for term, assoc in unused_prec: - errorlog.error('precedencerule %r defined for unknown symbol %r', assoc, term) - - -def report_conflicts(lr, debuglog, errorlog, debug): - if debug: - num_sr = len(lr.sr_conflicts) - if num_sr > 0: - errorlog.warning('%d shift/reduce conflicts', num_sr) - - num_rr = len(lr.rr_conflicts) - if num_rr > 0: - errorlog.warning('%d reduce/reduce conflicts', num_rr) - - # Report conflicts to debug log - if lr.sr_conflicts or lr.rr_conflicts: - debuglog.warning('') - debuglog.warning('Conflicts:') - for state, tok, resolution in lr.sr_conflicts: - debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) - for state, rule, rejected in lr.rr_conflicts: - debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - debuglog.warning('rejected rule (%s) in state %d', rejected, state) - errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - - -def build_parser(lr, pinfo): - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr, pinfo.error_func) - global parse - parse = parser.parse - return parser +localDirPath = os.path.dirname(os.path.realpath(__file__)) + +sys.path.append(sys.path[0] + "/../") +from local.parser.my_lexer import tokens +from local.parser.my_lexer import token_dict +from local.parser.functions import get_function +from local.parser.variables import get_variable +from local.parser.yacc import yacc + +def exec_fn(fn): + fn[0](*fn[1]) + +class MyYacc(): + + tokens = tokens + + def p_conditions_relation_function(p): + '''sentence : conditions THEN function + ''' + if p[1]: + exec_fn(p[3]) + + def p_conditions_or(p): + 'conditions : conditions OR and_conditions' + p[0] = p[1] or p[3] + + def p_conditions_and_conditions(p): + 'conditions : and_conditions' + p[0] = p[1] + + def p_and_conditions_and(p): + ''' + and_conditions : and_conditions AND not_conditions + ''' + p[0] = p[1] and p[3] + + def p_and_conditions_cdt(p): + 'and_conditions : not_conditions' + p[0] = p[1] + + def p_not_cdt(p): + 'not_conditions : NOT cdt' + p[0] = not p[2] + + def p_not_conditions_cdt(p): + 'not_conditions : cdt' + p[0] = p[1] + + def p_cdt_ops(p): + ''' + cdt : expr EQUAL expr + | expr NEQUAL expr + | expr GE expr + | expr GT expr + | expr LE expr + | expr LT expr + ''' + if p[2] == token_dict['EQUAL']: + p[0] = (p[1] == p[3]) + if p[2] == token_dict['NEQUAL']: + p[0] = (p[1] != p[3]) + if p[2] == token_dict['GE']: + p[0] = (p[1] >= p[3]) + if p[2] == token_dict['GT']: + p[0] = (p[1] > p[3]) + if p[2] == token_dict['LE']: + p[0] = (p[1] <= p[3]) + if p[2] == token_dict['LT']: + p[0] = (p[1] < p[3]) + + def p_cdt_parens(p): + 'cdt : LPAREN conditions RPAREN' + p[0] = p[2] + + def p_expr_plus_minus(p): + ''' + expr : expr PLUS term + | expr MINUS term + ''' + if p[2] == token_dict['PLUS']: + p[0] = p[1] + p[3] + if p[2] == token_dict['MINUS']: + p[0] = p[1] - p[3] + + def p_expr_term(p): + 'expr : term' + p[0] = p[1] + + def p_term_times_divide_mod(p): + ''' + term : term TIMES factor + | term DIVIDE factor + | term MOD factor + ''' + if p[2] == token_dict['TIMES']: + p[0] = p[1] * p[3] + if p[2] == token_dict['DIVIDE']: + p[0] = p[1] / p[3] + if p[2] == token_dict['MOD']: + p[0] = p[1] % p[3] + + def p_term_factor(p): + 'term : factor' + p[0] = p[1] + + def p_factor_assign_simple(p): + ''' + factor : number + | string + ''' + p[0] = p[1] + + def p_factor_id(p): + 'factor : id' + p[0] = get_variable(p[1]) + + def p_factor_null(p): + 'factor : NULL' + p[0] = None + + def p_factor_bool(p): + ''' + factor : TRUE + | FALSE + ''' + if p[1] == token_dict['TRUE']: + p[0] = True + elif p[1] == token_dict['FALSE']: + p[0] = False + + def p_factor_paren(p): + 'factor : LPAREN expr RPAREN' + p[0] = p[2] + + def p_function(p): + 'function : id LPAREN variables RPAREN' + p[0] = (get_function(p[1]), p[3]) + + def p_variables_comma(p): + ''' + variables : variables COMMA expr + ''' + p[1].append(p[3]) + p[0] = p[1] + + def p_variables_factor(p): + 'variables : expr' + p[0] = [p[1]] + + #Error rule for syntax errors + @staticmethod + def p_error(p): + raise Exception('Syntax error in input!') + + def build(self): + self.yacc = yacc(module=MyYacc) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 74ff1b9f..11390a1a 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -225,8 +225,6 @@ class LRParser: defaulted_states = self.defaulted_states pslice = YaccProduction(None) errorcount = 0 - if debug: - debug.info('PARSE DEbUG start') pslice.lexer = lexer pslice.parser = self if put is not None: @@ -258,13 +256,7 @@ class LRParser: sym.type = pname # Production name sym.value = None if debug: - if plen: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, - '[' + ','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + ']', - goto[statestack[-1 - plen]][pname]) - else: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], - goto[statestack[-1]][pname]) + self.log_reduce_action(debug, p, plen, symstack, statestack, goto) if plen: targ = symstack[-plen - 1:] targ[0] = sym @@ -276,8 +268,7 @@ class LRParser: self.state = state p.callable(pslice) del statestack[-plen:] - if debug: - debug.info('Result : %s', format_result(pslice[0])) + self.log_debug_info(debug, pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) @@ -293,16 +284,11 @@ class LRParser: self.errorok = False continue else: - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - targ = [sym] - pslice.slice = targ + self.update_tracking_info(tracking, sym, lexer, pslice) try: self.state = state p.callable(pslice) - if debug: - debug.info('Result : %s', format_result(pslice[0])) + self.log_debug_info(debug, pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) @@ -319,9 +305,7 @@ class LRParser: if t == 0: n = symstack[-1] result = getattr(n, 'value', None) - if debug: - debug.info('Done : Returning %s', format_result(result)) - debug.info('PARSE DEbUG END') + self.log_parse_debug_info() return result if t is None: if debug: @@ -358,16 +342,12 @@ class LRParser: if lookahead.type != 'error': lookahead = self.handle_error(lookahead, symstack, lookaheadstack, tracking) else: - sym = symstack.pop() - if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos - statestack.pop() - state = statestack[-1] + state = self.pop_and_update_state(symstack, statestack, tracking, lookahead) continue raise RuntimeError('yacc: internal parser error!!!\n') - def _initialize_parser(self, debug, lexer): + @staticmethod + def _initialize_parser(debug, lexer): if isinstance(debug, int) and debug: debug = Logger(sys.stderr) if not lexer: @@ -375,7 +355,8 @@ class LRParser: lexer = lex.lexer return debug, lexer - def parse_step(self, state, lookahead, lookaheadstack, statestack, symstack, actions, defaulted_states, debug, + @staticmethod + def parse_step(state, lookahead, lookaheadstack, statestack, symstack, actions, defaulted_states, debug, get_token): if debug: debug.debug('State : %s', state) @@ -402,7 +383,8 @@ class LRParser: return lookahead, lookaheadstack, state, t - def shift_and_goto(self, t, statestack, symstack, lookahead, debug, errorcount): + @staticmethod + def shift_and_goto(t, statestack, symstack, lookahead, debug, errorcount): """Handle the shift and goto action during parsing.""" statestack.append(t) # Shift the state state = t @@ -414,7 +396,8 @@ class LRParser: errorcount -= 1 # Decrement error count if there was a previous error return state, symstack, lookahead, errorcount - def update_tracking_info(self, tracking, targ, sym): + @staticmethod + def update_tracking_info(tracking, targ, sym): if tracking: t1 = targ[1] sym.lineno = t1.lineno @@ -423,7 +406,8 @@ class LRParser: sym.endlineno = getattr(t1, 'endlineno', t1.lineno) sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) - def handle_error(self, lookahead, symstack, lookaheadstack, tracking): + @staticmethod + def handle_error(lookahead, symstack, lookaheadstack, tracking): sym = symstack[-1] if sym.type == 'error': if tracking: @@ -442,7 +426,8 @@ class LRParser: lookahead = t return lookahead - def handle_syntax_error(self, errtoken, lookahead): + @staticmethod + def handle_syntax_error(errtoken, lookahead): if errtoken: if hasattr(errtoken, 'lineno'): lineno = lookahead.lineno @@ -456,6 +441,52 @@ class LRParser: sys.stderr.write('yacc: Parse error in input. EOF\n') return + @staticmethod + def log_reduce_action(debug, p, plen, symstack, statestack, goto): + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '[' + ','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + ']', + goto[statestack[-1 - plen]][p.name]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][p.name]) + + @staticmethod + def log_debug_info(debug, pslice): + if debug: + debug.info('Result : %s', format_result(pslice[0])) + + @staticmethod + def update_tracking_sym(tracking, sym, lexer, pslice): + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + targ = [sym] + pslice.slice = targ + + @staticmethod + def log_parse_debug_info(debug, result): + if debug: + debug.info('Done : Returning %s', format_result(result)) + debug.info('PARSE DEbUG END') + + @staticmethod + def pop_and_update_state(symstack, statestack, tracking, lookahead): + # 弹出符号栈的顶部元素 + sym = symstack.pop() + + # 如果需要跟踪信息,更新lookahead的行号和位置 + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + + # 弹出状态栈的顶部元素,并更新当前状态 + statestack.pop() + state = statestack[-1] + + # 返回更新后的状态 + return state + _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') -- Gitee From dcdbfdc18ca4efa689254345f048a156e03bd98e Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Thu, 12 Dec 2024 17:33:07 +0800 Subject: [PATCH 45/87] dsgsdg --- script/local/parser/yacc.py | 57 ++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 11390a1a..7863811a 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -308,29 +308,12 @@ class LRParser: self.log_parse_debug_info() return result if t is None: - if debug: - debug.error('Error : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - if errorcount == 0 or self.errorok: - errorcount = ERROR_COUNT - self.errorok = False - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, 'lexer'): - errtoken.lexer = lexer - self.state = state - tok = self.errorfunc(errtoken) - if self.errorok: - lookahead = tok - errtoken = None - continue - else: - self.handle_syntax_error(errtoken, lookahead) - - else: - errorcount = ERROR_COUNT + if t is None: + lookahead, errtoken, errorcount = self.handle_parse_error(debug, errorcount, lookahead, errtoken, + state, lexer) + if errtoken is None: + break # If there's no valid token, exit + continue # Otherwise, continue to get a new token if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None @@ -487,6 +470,34 @@ class LRParser: # 返回更新后的状态 return state + def handle_parse_error(self, debug, errorcount, lookahead, errtoken, state, lexer): + """ + 处理语法解析中的错误。 + """ + if debug: + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in self.symstack][1:]), str(lookahead))).lstrip()) + + if errorcount == 0 or self.errorok: + errorcount = ERROR_COUNT + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = self.errorfunc(errtoken) + if self.errorok: + lookahead = tok + errtoken = None + return lookahead, errtoken, errorcount # Continue with new token + else: + self.handle_syntax_error(errtoken, lookahead) + return lookahead, errtoken, errorcount + _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') -- Gitee From 9233b2edb142131ceaa9f16b6fe6078af1666989 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Fri, 13 Dec 2024 14:23:44 +0800 Subject: [PATCH 46/87] asfaf --- script/local/parser/yacc.py | 128 +++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 59 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 7863811a..e3623f01 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -249,71 +249,22 @@ class LRParser: debug, errorcount) continue if t < 0: - p = prod[-t] - pname = p.name - plen = p.len - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - if debug: - self.log_reduce_action(debug, p, plen, symstack, statestack, goto) - if plen: - targ = symstack[-plen - 1:] - targ[0] = sym - self.update_tracking_info(tracking, targ, sym) - pslice.slice = targ - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - self.state = state - p.callable(pslice) - del statestack[-plen:] - self.log_debug_info(debug, pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - lookaheadstack.append(lookahead) # Save the current lookahead token - symstack.extend(targ[1:-1]) # Put the production slice back on the stack - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym - errorcount = ERROR_COUNT - self.errorok = False - continue - else: - self.update_tracking_info(tracking, sym, lexer, pslice) - try: - self.state = state - p.callable(pslice) - self.log_debug_info(debug, pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - lookaheadstack.append(lookahead) # Save the current lookahead token - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym - errorcount = ERROR_COUNT - self.errorok = False - continue + lookahead, state, symstack, statestack, errorcount = self.process_production_rule( + lookaheadstack,lexer,t, prod, symstack, statestack, lookahead, state, goto, pslice, tracking, errorcount, debug + ) + continue if t == 0: n = symstack[-1] result = getattr(n, 'value', None) self.log_parse_debug_info() return result if t is None: - if t is None: - lookahead, errtoken, errorcount = self.handle_parse_error(debug, errorcount, lookahead, errtoken, - state, lexer) - if errtoken is None: - break # If there's no valid token, exit - continue # Otherwise, continue to get a new token + lookahead, errtoken, errorcount = self.handle_parse_error(debug, errorcount, lookahead, errtoken, + state, lexer) + if errtoken is None: + break + continue + if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None @@ -379,6 +330,61 @@ class LRParser: errorcount -= 1 # Decrement error count if there was a previous error return state, symstack, lookahead, errorcount + def process_production_rule(self,lookaheadstack,lexer, t, prod, symstack, statestack, lookahead, state, goto, pslice, tracking, + errorcount, debug): + p = prod[-t] # Get the production rule + pname = p.name + plen = p.len + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + self.log_goto(debug, p, plen, symstack, statestack, goto) + + if plen: + targ = symstack[-plen - 1:] + targ[0] = sym + self.update_tracking_info(tracking, targ, sym) + pslice.slice = targ + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + self.log_debug_info(debug, pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + lookaheadstack.append(lookahead) + symstack.extend(targ[1:-1]) + statestack.pop() + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = ERROR_COUNT + self.errorok = False + else: + self.update_tracking_info(tracking, sym, lexer, pslice) + try: + self.state = state + p.callable(pslice) + self.log_debug_info(debug, pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + lookaheadstack.append(lookahead) + statestack.pop() + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = ERROR_COUNT + self.errorok = False + + return lookahead, state, symstack, statestack, errorcount @staticmethod def update_tracking_info(tracking, targ, sym): if tracking: @@ -424,6 +430,10 @@ class LRParser: sys.stderr.write('yacc: Parse error in input. EOF\n') return + def log_goto(self, debug, p, plen, symstack, statestack, goto): + if debug: + self.log_reduce_action(debug, p, plen, symstack, statestack, goto) + @staticmethod def log_reduce_action(debug, p, plen, symstack, statestack, goto): if plen: -- Gitee From cb21f0c30d07209254b38247b70f810eced4c004 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Fri, 13 Dec 2024 14:28:06 +0800 Subject: [PATCH 47/87] safdasgdshn --- script/local/parser/yacc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index e3623f01..401b47e8 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -264,7 +264,6 @@ class LRParser: if errtoken is None: break continue - if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None @@ -330,7 +329,7 @@ class LRParser: errorcount -= 1 # Decrement error count if there was a previous error return state, symstack, lookahead, errorcount - def process_production_rule(self,lookaheadstack,lexer, t, prod, symstack, statestack, lookahead, state, goto, pslice, tracking, + def process_production_rule(self, lookaheadstack, lexer, t, prod, symstack, statestack, lookahead, state, goto, pslice, tracking, errorcount, debug): p = prod[-t] # Get the production rule pname = p.name @@ -339,7 +338,6 @@ class LRParser: sym.type = pname # Production name sym.value = None self.log_goto(debug, p, plen, symstack, statestack, goto) - if plen: targ = symstack[-plen - 1:] targ[0] = sym -- Gitee From c3058e958d9ea8ece12e6cbac104eb2c23ef1b5e Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Fri, 13 Dec 2024 14:30:59 +0800 Subject: [PATCH 48/87] ewherjertkj --- script/local/parser/yacc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 401b47e8..a01346b7 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -313,7 +313,6 @@ class LRParser: if debug: debug.debug('Stack : %s', ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - return lookahead, lookaheadstack, state, t @staticmethod @@ -381,8 +380,8 @@ class LRParser: lookahead = sym errorcount = ERROR_COUNT self.errorok = False - return lookahead, state, symstack, statestack, errorcount + @staticmethod def update_tracking_info(tracking, targ, sym): if tracking: -- Gitee From 6122f703270dbd754f2184692f4ac958d5b3b902 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Fri, 13 Dec 2024 14:34:53 +0800 Subject: [PATCH 49/87] dfhdfhdfh --- script/local/parser/yacc.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index a01346b7..d4a17a74 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -293,7 +293,6 @@ class LRParser: get_token): if debug: debug.debug('State : %s', state) - if state not in defaulted_states: if not lookahead: if not lookaheadstack: @@ -322,7 +321,7 @@ class LRParser: state = t if debug: debug.debug('Action : Shift and goto state %s', t) - symstack.append(lookahead) # Push the symbol onto the symbol stack + symstack.append(lookahead) lookahead = None # Reset the lookahead token if errorcount: errorcount -= 1 # Decrement error count if there was a previous error @@ -330,11 +329,11 @@ class LRParser: def process_production_rule(self, lookaheadstack, lexer, t, prod, symstack, statestack, lookahead, state, goto, pslice, tracking, errorcount, debug): - p = prod[-t] # Get the production rule + p = prod[-t] pname = p.name plen = p.len sym = YaccSymbol() - sym.type = pname # Production name + sym.type = pname sym.value = None self.log_goto(debug, p, plen, symstack, statestack, goto) if plen: -- Gitee From 65ec1118035fbc93dd413d4fd86a5085cc790c94 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Fri, 13 Dec 2024 15:11:35 +0800 Subject: [PATCH 50/87] asfasfasfdfhdfhdfh --- script/local/parser/my_lexer.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/script/local/parser/my_lexer.py b/script/local/parser/my_lexer.py index efb36e63..eadbb958 100644 --- a/script/local/parser/my_lexer.py +++ b/script/local/parser/my_lexer.py @@ -130,22 +130,18 @@ class MyLexer(): r'\n+' t.lexer.lineno += len(t.value) - # A string containing ignored characters (spaces and tabs) t_ignore = ' \t' - # Error handling rule @staticmethod - def t_error(t): - raise Exception('Illegal character "%s"' % t.value[0]) - t.lexer.skip(1) + def t_error(item): + raise Exception('"%s"' % item.value[0]) + item.lexer.skip(1) - # Build the lexer def build(self,**kwargs): self.lexer = lex(module=self, **kwargs) - # Test it output - def test(self, data): - self.lexer.input(data) + def test(self, item): + self.lexer.input(item) while True: tok = self.lexer.token() if not tok: -- Gitee From e8f03022cbf2f250565a041a7d2a39a1a8033162 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 16 Dec 2024 09:08:57 +0800 Subject: [PATCH 51/87] =?UTF-8?q?=E5=BC=80=E6=BA=90=E6=89=AB=E6=8F=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/lex.py | 300 +++++++++++++++---------------------- 1 file changed, 120 insertions(+), 180 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index cc67d845..86781ee1 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -24,26 +24,19 @@ import types import copy import inspect -# This tuple contains acceptable string types -StringTypes = (str, bytes) -# This regular expression is used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') +DataForms = (str, bytes) -# Exception thrown when invalid token encountered and no default error -# handler is defined. -class LexError(Exception): +alpha_numeric_check = re.compile(r'^[a-zA-Z0-9_]+$') + +class SyntaxViolation(Exception): def __init__(self, message, s): self.args = (message,) self.text = s -# Token class. This class is used to represent the tokens produced. -class LexToken(object): +class SyntaxToken(object): def __repr__(self): - return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})' - -# This object is a stand-in for a logging object created by the -# logging module. + return f'SyntaxToken({self.type},{self.value!r},{self.lineno},{self.analyzerpos})' class Logger(object): def __init__(self, f): @@ -61,57 +54,30 @@ class Logger(object): info = critical debug = critical -# ----------------------------------------------------------------------------- -# === Lexing Engine === -# -# The following Lexer class implements the lexer runtime. There are only -# a few public methods and attributes: -# -# input() - Store a new string in the lexer -# token() - Get the next token -# clone() - Clone the lexer -# -# lineno - Current line number -# lexpos - Current position in the input string -# ----------------------------------------------------------------------------- - -class Lexer: +class LexicalAnalyzer: def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re, findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = 'INITIAL' # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexstateeoff = {} # Dictionary of eof functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lexeoff = None # EOF rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = '' # Ignored characters - self.lexliterals = '' # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - - def clone(self, obj=None): - c = copy.copy(self) - - if obj: - c.lexstatere = self._rebind_lexstatere(obj) - c.lexstateerrorf = self._rebind_lexstateerrorf(obj) - c.lexmodule = obj - - return c + self.lexre = None + self.analyzertext = None + self.lexstatere = {} + self.analyzerstateretext = {} + self.analyzerstaterenames = {} + self.lexstate = 'INITIAL' + self.analyzerstatestack = [] + self.analyzerstateinfo = None + self.analyzerstateignore = {} + self.analyzerstateerrorf = {} + self.analyzerstateeoff = {} + self.analyzerreflags = 0 + self.analyzerdata = None + self.analyzerpos = 0 + self.analyzerlen = 0 + self.analyzererrorf = None + self.analyzereoff = None + self.analyzertokens = None + self.analyzertignore = '' + self.lexliterals = '' + self.analyzermodule = None + self.lineno = 1 def _rebind_lexstatere(self, obj): newtab = {} @@ -133,117 +99,91 @@ class Lexer: newfindex.append((getattr(obj, f[0].__name__), f[1])) return newfindex - def _rebind_lexstateerrorf(self, obj): + def _rebind_analyzerstateerrorf(self, obj): newtab = {} - for key, ef in self.lexstateerrorf.items(): + for key, ef in self.analyzerstateerrorf.items(): newtab[key] = getattr(obj, ef.__name__) return newtab - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ def input(self, s): - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self, state): + self.analyzerdata = s + self.analyzerpos = 0 + self.analyzerlen = len(s) + + def analysisStart(self, state): if state not in self.lexstatere: raise ValueError(f'Undefined state {state!r}') self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state, '') - self.lexerrorf = self.lexstateerrorf.get(state, None) - self.lexeoff = self.lexstateeoff.get(state, None) + self.analyzertext = self.analyzerstateretext[state] + self.analyzertignore = self.analyzerstateignore.get(state, '') + self.analyzererrorf = self.analyzerstateerrorf.get(state, None) + self.analyzereoff = self.analyzerstateeoff.get(state, None) self.lexstate = state - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self, state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): + def addState(self, state): + self.analyzerstatestack.append(self.lexstate) + self.analysisStart(state) + + def revertState(self): + self.analysisStart(self.analyzerstatestack.pop()) + + def stateNow(self): return self.lexstate - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ def skip(self, n): - self.lexpos += n + self.analyzerpos += n - # ------------------------------------------------------------ - # token() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata + analyzerpos = self.analyzerpos + analyzerlen = self.analyzerlen + analyzertignore = self.analyzertignore + analyzerdata = self.analyzerdata - while lexpos < lexlen: + while analyzerpos < analyzerlen: # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 + if analyzerdata[analyzerpos] in analyzertignore: + analyzerpos += 1 continue # Look for a regular expression match - tok = self._process_regex_matches(lexpos) + tok = self._process_regex_matches(analyzerpos) if tok: return tok # No match, see if in literals - tok = self._process_literals(lexpos) + tok = self._process_literals(analyzerpos) if tok: return tok # No match. Call t_error() if defined. - tok = self._handle_error(lexpos) + tok = self._handle_error(analyzerpos) if tok: return tok - self.lexpos = lexpos - raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", - lexdata[lexpos:]) + self.analyzerpos = analyzerpos + raise SyntaxViolation(f"Illegal character {analyzerdata[analyzerpos]!r} at index {analyzerpos}", + analyzerdata[analyzerpos:]) - if self.lexeoff: + if self.analyzereoff: tok = self._process_eof() return tok - self.lexpos = lexpos + 1 - if self.lexdata is None: + self.analyzerpos = analyzerpos + 1 + if self.analyzerdata is None: raise RuntimeError('No input string given with input()') return None - def _process_regex_matches(self, lexpos): + def _process_regex_matches(self, analyzerpos): for lexre, lexindexfunc in self.lexre: - m = lexre.match(self.lexdata, lexpos) + m = lexre.match(self.analyzerdata, analyzerpos) if not m: continue # Create a token for return - tok = LexToken() + tok = SyntaxToken() tok.value = m.group() tok.lineno = self.lineno - tok.lexpos = lexpos + tok.analyzerpos = analyzerpos i = m.lastindex func, tok.type = lexindexfunc[i] @@ -251,70 +191,70 @@ class Lexer: if not func: # If no token type was set, it's an ignored token if tok.type: - self.lexpos = m.end() + self.analyzerpos = m.end() return tok else: - lexpos = m.end() + analyzerpos = m.end() break - lexpos = m.end() + analyzerpos = m.end() # If token is processed by a function, call it tok.lexer = self # Set additional attributes useful in token rules self.lexmatch = m - self.lexpos = lexpos + self.analyzerpos = analyzerpos newtok = func(tok) del tok.lexer del self.lexmatch # Every function must return a token, if nothing, we just move to next token if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - lexignore = self.lexignore # This is here in case there was a state change + analyzerpos = self.analyzerpos # This is here in case user has updated analyzerpos. + analyzertignore = self.analyzertignore # This is here in case there was a state change break return newtok return None - def _process_literals(self, lexpos): - if self.lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = self.lexdata[lexpos] + def _process_literals(self, analyzerpos): + if self.analyzerdata[analyzerpos] in self.lexliterals: + tok = SyntaxToken() + tok.value = self.analyzerdata[analyzerpos] tok.lineno = self.lineno tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 + tok.analyzerpos = analyzerpos + self.analyzerpos = analyzerpos + 1 return tok return None - def _handle_error(self, lexpos): - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] + def _handle_error(self, analyzerpos): + if self.analyzererrorf: + tok = SyntaxToken() + tok.value = self.analyzerdata[analyzerpos:] tok.lineno = self.lineno tok.type = 'error' tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: + tok.analyzerpos = analyzerpos + self.analyzerpos = analyzerpos + newtok = self.analyzererrorf(tok) + if analyzerpos == self.analyzerpos: # Error method didn't change text position at all. This is an error. - raise LexError(f"Scanning error. Illegal character {self.lexdata[lexpos]!r}", - self.lexdata[lexpos:]) - lexpos = self.lexpos + raise SyntaxViolation(f"Scanning error. Illegal character {self.analyzerdata[analyzerpos]!r}", + self.analyzerdata[analyzerpos:]) + analyzerpos = self.analyzerpos if not newtok: return None return newtok return None def _process_eof(self): - tok = LexToken() + tok = SyntaxToken() tok.type = 'eof' tok.value = '' tok.lineno = self.lineno - tok.lexpos = self.lexpos + tok.analyzerpos = self.analyzerpos tok.lexer = self - self.lexpos = self.lexpos - newtok = self.lexeoff(tok) + self.analyzerpos = self.analyzerpos + newtok = self.analyzereoff(tok) return newtok # Iterator interface @@ -471,7 +411,7 @@ class LexerReflect(object): def validate_tokens(self): terminals = {} for n in self.tokens: - if not _is_identifier.match(n): + if not alpha_numeric_check.match(n): self.log.error(f"Bad token name {n!r}") self.error = True if n in terminals: @@ -488,7 +428,7 @@ class LexerReflect(object): def validate_literals(self): try: for c in self.literals: - if not isinstance(c, StringTypes) or len(c) > 1: + if not isinstance(c, DataForms) or len(c) > 1: self.log.error(f'Invalid literal {c!r}. Must be a single character') self.error = True @@ -513,7 +453,7 @@ class LexerReflect(object): self.error = True continue name, statetype = s - if not isinstance(name, StringTypes): + if not isinstance(name, DataForms): self.log.error('State name %r must be a string', name) self.error = True continue @@ -556,7 +496,7 @@ class LexerReflect(object): if hasattr(t, '__call__'): self.process_function_rule(f, t, states, tokname) - elif isinstance(t, StringTypes): + elif isinstance(t, DataForms): self.process_string_rule(f, t, states, tokname) else: self.log.error('%s not defined as a function or string', f) @@ -751,7 +691,7 @@ def lex(*, module=None, obj=None, debug=False, ldict = None stateinfo = {'INITIAL': 'inclusive'} - lexobj = Lexer() + lexobj = LexicalAnalyzer() global token, lex_input errorlog = build_error_log(errorlog) if debug: @@ -785,9 +725,9 @@ def lex(*, module=None, obj=None, debug=False, debuglog.info('lex: states = %r', linfo.stateinfo) # Build a dictionary of valid token names - lexobj.lextokens = set() + lexobj.analyzertokens = set() for n in linfo.tokens: - lexobj.lextokens.add(n) + lexobj.analyzertokens.add(n) # Get literals specification if isinstance(linfo.literals, (list, tuple)): @@ -795,7 +735,7 @@ def lex(*, module=None, obj=None, debug=False, else: lexobj.lexliterals = linfo.literals - lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + lexobj.lextokens_all = lexobj.analyzertokens | set(lexobj.lexliterals) # Get the stateinfo dictionary stateinfo = linfo.stateinfo @@ -810,8 +750,8 @@ def lex(*, module=None, obj=None, debug=False, for state in regexs: lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - lexobj.lexstaterenames[state] = re_names + lexobj.analyzerstateretext[state] = re_text + lexobj.analyzerstaterenames[state] = re_names if debug: for i, text in enumerate(re_text): debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) @@ -820,27 +760,27 @@ def lex(*, module=None, obj=None, debug=False, for state, stype in stateinfo.items(): if state != 'INITIAL' and stype == 'inclusive': lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + lexobj.analyzerstateretext[state].extend(lexobj.analyzerstateretext['INITIAL']) + lexobj.analyzerstaterenames[state].extend(lexobj.analyzerstaterenames['INITIAL']) - lexobj.lexstateinfo = stateinfo + lexobj.analyzerstateinfo = stateinfo lexobj.lexre = lexobj.lexstatere['INITIAL'] - lexobj.lexretext = lexobj.lexstateretext['INITIAL'] - lexobj.lexreflags = reflags + lexobj.analyzertext = lexobj.analyzerstateretext['INITIAL'] + lexobj.analyzerreflags = reflags # Set up ignore variables - lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') + lexobj.analyzerstateignore = linfo.ignore + lexobj.analyzertignore = lexobj.analyzerstateignore.get('INITIAL', '') # Set up error functions - lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) - if not lexobj.lexerrorf: + lexobj.analyzerstateerrorf = linfo.errorf + lexobj.analyzererrorf = linfo.errorf.get('INITIAL', None) + if not lexobj.analyzererrorf: errorlog.warning('No t_error rule is defined') # Set up eof functions - lexobj.lexstateeoff = linfo.eoff - lexobj.lexeoff = linfo.eoff.get('INITIAL', None) + lexobj.analyzerstateeoff = linfo.eoff + lexobj.analyzereoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules check_state_info(stateinfo, linfo, errorlog, lexobj) @@ -881,7 +821,7 @@ def check_state_info(stateinfo, linfo, errorlog, lexobj): if stype == 'exclusive': if s not in linfo.errorf: errorlog.warning("No error rule is defined for exclusive state %r", s) - if s not in linfo.ignore and lexobj.lexignore: + if s not in linfo.ignore and lexobj.analyzertignore: errorlog.warning("No ignore rule is defined for exclusive state %r", s) elif stype == 'inclusive': if s not in linfo.errorf: @@ -915,7 +855,7 @@ def runmain(lexer_instance=None, data=None): tok = local_token() if not tok: break - sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') + sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.analyzerpos})\n') # ----------------------------------------------------------------------------- -- Gitee From 748b73423d3d8e98f63a37c7d08c4cef72659b4d Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 16 Dec 2024 11:31:36 +0800 Subject: [PATCH 52/87] =?UTF-8?q?=E5=BC=80=E8=BF=9001?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/lex.py | 360 ++++++++++++++++--------------------- 1 file changed, 156 insertions(+), 204 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 86781ee1..6ffbdca3 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -56,7 +56,7 @@ class Logger(object): class LexicalAnalyzer: def __init__(self): - self.lexre = None + self.lexpattern = None self.analyzertext = None self.lexstatere = {} self.analyzerstateretext = {} @@ -81,11 +81,11 @@ class LexicalAnalyzer: def _rebind_lexstatere(self, obj): newtab = {} - for key, ritem in self.lexstatere.items(): + for key, element in self.lexstatere.items(): newre = [] - for cre, findex in ritem: + for creek, findex in element : newfindex = self._rebind_findex(obj, findex) - newre.append((cre, newfindex)) + newre.append((creek, newfindex)) newtab[key] = newre return newtab @@ -110,27 +110,27 @@ class LexicalAnalyzer: self.analyzerpos = 0 self.analyzerlen = len(s) - def analysisStart(self, state): + def analysis_start(self, state): if state not in self.lexstatere: raise ValueError(f'Undefined state {state!r}') - self.lexre = self.lexstatere[state] + self.lexpattern = self.lexstatere[state] self.analyzertext = self.analyzerstateretext[state] self.analyzertignore = self.analyzerstateignore.get(state, '') self.analyzererrorf = self.analyzerstateerrorf.get(state, None) self.analyzereoff = self.analyzerstateeoff.get(state, None) self.lexstate = state - def addState(self, state): + def add_state(self, state): self.analyzerstatestack.append(self.lexstate) - self.analysisStart(state) + self.analysis_start(state) - def revertState(self): - self.analysisStart(self.analyzerstatestack.pop()) + def revert_state(self): + self.analysis_start(self.analyzerstatestack.pop()) - def stateNow(self): + def state_now(self): return self.lexstate - def skip(self, n): + def defer(self, n): self.analyzerpos += n def token(self): @@ -140,7 +140,6 @@ class LexicalAnalyzer: analyzerdata = self.analyzerdata while analyzerpos < analyzerlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters if analyzerdata[analyzerpos] in analyzertignore: analyzerpos += 1 continue @@ -174,45 +173,41 @@ class LexicalAnalyzer: return None def _process_regex_matches(self, analyzerpos): - for lexre, lexindexfunc in self.lexre: - m = lexre.match(self.analyzerdata, analyzerpos) - if not m: + for lexpattern , lexindexfunc in self.lexpattern : + marker = lexpattern .match(self.analyzerdata, analyzerpos) + if not marker: continue + element = SyntaxToken() + element.value = marker.group() + element.lineno = self.lineno + element.analyzerpos = analyzerpos - # Create a token for return - tok = SyntaxToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.analyzerpos = analyzerpos - - i = m.lastindex - func, tok.type = lexindexfunc[i] + i = marker.lastindex + func, element.type = lexindexfunc[i] if not func: # If no token type was set, it's an ignored token - if tok.type: - self.analyzerpos = m.end() - return tok + if element.type: + self.analyzerpos = marker.end() + return element else: - analyzerpos = m.end() + analyzerpos = marker.end() break - analyzerpos = m.end() + analyzerpos = marker.end() - # If token is processed by a function, call it - tok.lexer = self # Set additional attributes useful in token rules - self.lexmatch = m + element.lexer = self + self.lexmatch = marker self.analyzerpos = analyzerpos - newtok = func(tok) - del tok.lexer + newflag = func(element) + del element.lexer del self.lexmatch - # Every function must return a token, if nothing, we just move to next token - if not newtok: + if not newflag: analyzerpos = self.analyzerpos # This is here in case user has updated analyzerpos. analyzertignore = self.analyzertignore # This is here in case there was a state change break - return newtok + return newflag return None def _process_literals(self, analyzerpos): @@ -235,15 +230,14 @@ class LexicalAnalyzer: tok.lexer = self tok.analyzerpos = analyzerpos self.analyzerpos = analyzerpos - newtok = self.analyzererrorf(tok) + newresult = self.analyzererrorf(tok) if analyzerpos == self.analyzerpos: - # Error method didn't change text position at all. This is an error. - raise SyntaxViolation(f"Scanning error. Illegal character {self.analyzerdata[analyzerpos]!r}", + raise SyntaxViolation(f"Illegal character {self.analyzerdata[analyzerpos]!r}", self.analyzerdata[analyzerpos:]) analyzerpos = self.analyzerpos - if not newtok: + if not newresult: return None - return newtok + return newresult return None def _process_eof(self): @@ -257,120 +251,80 @@ class LexicalAnalyzer: newtok = self.analyzereoff(tok) return newtok - # Iterator interface def __iter__(self): return self def __next__(self): - t = self.token() - if t is None: + category = self.token() + if category is None: raise StopIteration - return t + return category -# ----------------------------------------------------------------------------- -# ==== Lex Builder === -# -# The functions and classes below are used to collect lexing information -# and build a Lexer object from it. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# -# Returns the regular expression assigned to a function either as a doc string -# or as a .regex attribute attached by the @TOKEN decorator. -# ----------------------------------------------------------------------------- -def _get_regex(func): +def _build_checker(func): return getattr(func, 'regex', func.__doc__) -# ----------------------------------------------------------------------------- -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- -def get_caller_module_dict(levels): - f = sys._getframe(levels) - return {**f.f_globals, **f.f_locals} - -# ----------------------------------------------------------------------------- -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- +def fetch_collect_info(item): + fet = sys._getframe(item) + return {**fet.f_globals, **fet.f_locals} -def _form_master_re(relist, reflags, ldict, toknames): - if not relist: +def _create_validation_re(ele, banner, boost, largely): + if not ele: return [], [], [] - regex = '|'.join(relist) + regex = '|'.join(ele) try: - lexre = re.compile(regex, reflags) - lexindexfunc, lexindexnames = _handle_groupindex(lexre, ldict, toknames) - return [(lexre, lexindexfunc)], [regex], [lexindexnames] + lexpattern = re.compile(regex, banner) + lexindexfunc, lexindexnames = _handle_groupindex(lexpattern , boost, largely) + return [(lexpattern , lexindexfunc)], [regex], [lexindexnames] except Exception: - m = (len(relist) // 2) + 1 - llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) - rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) - return (llist+rlist), (lre+rre), (lnames+rnames) + m = (len(ele) // 2) + 1 + clue, poll, fre = _create_validation_re(ele[:m], banner, boost, largely) + site, reveal, eng = _create_validation_re(ele[m:], banner, boost, largely) + return (clue+site), (poll+reveal), (fre+eng) -def _handle_groupindex(lexre, ldict, toknames): - lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) - lexindexnames = lexindexfunc[:] +def _handle_groupindex(cultivate, extent, negative): + immediate = [None] * (max(cultivate .groupindex.values()) + 1) + reveal = immediate[:] - for f, i in lexre.groupindex.items(): - handle = ldict.get(f, None) + for peak, shed in cultivate .groupindex.items(): + handle = extent.get(peak, None) if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle, toknames[f]) - lexindexnames[i] = f + immediate[shed] = (handle, negative[peak]) + reveal[shed] = peak elif handle is not None: - lexindexnames[i] = f - if f.find('ignore_') > 0: - lexindexfunc[i] = (None, None) + reveal[shed] = peak + if peak.find('ignore_') > 0: + immediate[shed] = (None, None) else: - lexindexfunc[i] = (None, toknames[f]) - - return lexindexfunc, lexindexnames + immediate[shed] = (None, negative[peak]) + return immediate, reveal -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- -def _statetoken(s, names): - parts = s.split('_') - for i, part in enumerate(parts[1:], 1): - if part not in names and part != 'ANY': +def strengthen(rigorous, tackle): + bother = rigorous.split('_') + for brand, conflict in enumerate(bother[1:], 1): + if conflict not in tackle and conflict != 'ANY': break - if i > 1: - states = tuple(parts[1:i]) + if brand > 1: + contrast = tuple(bother[1:brand]) else: - states = ('INITIAL',) + contrast = ('INITIAL',) - if 'ANY' in states: - states = tuple(names) + if 'ANY' in contrast: + contrast = tuple(tackle) - tokenname = '_'.join(parts[i:]) - return (states, tokenname) + consume = '_'.join(bother[brand:]) + return (contrast, consume) -# ----------------------------------------------------------------------------- -# -# This class represents information needed to build a lexer as extracted from a -# user's input file. -# ----------------------------------------------------------------------------- -class LexerReflect(object): - def __init__(self, ldict, log=None, reflags=0): - self.ldict = ldict - self.error_func = None +class LexerConflict (object): + def __init__(self, rival, log=None, transform=0): + self.rival = rival + self.earn_scale = None self.tokens = [] - self.reflags = reflags - self.stateinfo = {'INITIAL': 'inclusive'} - self.modules = set() - self.error = False + self.transform = transform + self.strategy = {'INITIAL': 'inclusive'} + self.margin = set() + self.earn = False self.log = Logger(sys.stderr) if log is None else log # Get all of the basic information @@ -385,24 +339,24 @@ class LexerReflect(object): self.validate_tokens() self.validate_literals() self.validate_rules() - return self.error + return self.earn # Get the tokens map def get_tokens(self): - tokens = self.ldict.get('tokens', None) + tokens = self.rival.get('tokens', None) if not tokens: self.log.error('No token list is defined') - self.error = True + self.earn = True return if not isinstance(tokens, (list, tuple)): self.log.error('tokens must be a list or tuple') - self.error = True + self.earn = True return if not tokens: self.log.error('tokens is empty') - self.error = True + self.earn = True return self.tokens = tokens @@ -413,14 +367,14 @@ class LexerReflect(object): for n in self.tokens: if not alpha_numeric_check.match(n): self.log.error(f"Bad token name {n!r}") - self.error = True + self.earn = True if n in terminals: self.log.warning(f"Token {n!r} multiply defined") terminals[n] = 1 # Get the literals specifier def get_literals(self): - self.literals = self.ldict.get('literals', '') + self.literals = self.rival.get('literals', '') if not self.literals: self.literals = '' @@ -430,68 +384,68 @@ class LexerReflect(object): for c in self.literals: if not isinstance(c, DataForms) or len(c) > 1: self.log.error(f'Invalid literal {c!r}. Must be a single character') - self.error = True + self.earn = True except TypeError: self.log.error('Invalid literals specification. literals must be a sequence of characters') - self.error = True + self.earn = True def get_states(self): - self.states = self.ldict.get('states', None) + self.states = self.rival.get('states', None) # Build statemap if not self.states: return if not isinstance(self.states, (tuple, list)): self.log.error('states must be defined as a tuple or list') - self.error = True + self.earn = True return for s in self.states: if not isinstance(s, tuple) or len(s) != 2: self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) - self.error = True + self.earn = True continue name, statetype = s if not isinstance(name, DataForms): self.log.error('State name %r must be a string', name) - self.error = True + self.earn = True continue if not (statetype == 'inclusive' or statetype == 'exclusive'): self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) - self.error = True + self.earn = True continue - if name in self.stateinfo: + if name in self.strategy: self.log.error("State %r already defined", name) - self.error = True + self.earn = True continue - self.stateinfo[name] = statetype + self.strategy[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_'] + tsymbols = [f for f in self.rival if f[:2] == 't_'] # Now build up a list of functions and a list of strings self.toknames = {} # Mapping of symbols to token names self.funcsym = {} # Symbols defined as functions self.strsym = {} # Symbols defined as strings self.ignore = {} # Ignore strings by state - self.errorf = {} # Error functions by state + self.earnf = {} # Error functions by state self.eoff = {} # EOF functions by state - for s in self.stateinfo: + for s in self.strategy: self.funcsym[s] = [] self.strsym[s] = [] if len(tsymbols) == 0: self.log.error('No rules of the form t_rulename are defined') - self.error = True + self.earn = True return for f in tsymbols: - t = self.ldict[f] - states, tokname = _statetoken(f, self.stateinfo) + t = self.rival[f] + states, tokname = strengthen(f, self.strategy) self.toknames[f] = tokname if hasattr(t, '__call__'): @@ -500,7 +454,7 @@ class LexerReflect(object): self.process_string_rule(f, t, states, tokname) else: self.log.error('%s not defined as a function or string', f) - self.error = True + self.earn = True # Sort the functions by line number for f in self.funcsym.values(): @@ -513,7 +467,7 @@ class LexerReflect(object): def process_function_rule(self, f, t, states, tokname): if tokname == 'error': for s in states: - self.errorf[s] = t + self.earnf[s] = t elif tokname == 'eof': for s in states: self.eoff[s] = t @@ -521,7 +475,7 @@ class LexerReflect(object): line = t.__code__.co_firstlineno file = t.__code__.co_filename self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) - self.error = True + self.earn = True else: for s in states: self.funcsym[s].append((f, t)) @@ -534,20 +488,20 @@ class LexerReflect(object): self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': self.log.error("Rule %r must be defined as a function", f) - self.error = True + self.earn = True else: for s in states: self.strsym[s].append((f, t)) # Validate all of the t_rules collected def validate_rules(self): - for state in self.stateinfo: + for state in self.strategy: self._validate_func_rules(state) self._validate_str_rules(state) self._validate_no_rules(state) self._validate_error_function(state) - for module in self.modules: + for module in self.margin: self.validate_module(module) def _validate_func_rules(self, state): @@ -555,14 +509,14 @@ class LexerReflect(object): line = f.__code__.co_firstlineno file = f.__code__.co_filename module = inspect.getmodule(f) - self.modules.add(module) + self.margin.add(module) if not self._validate_function(f, file, line): continue - if not _get_regex(f): + if not _build_checker (f): self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) - self.error = True + self.earn = True continue self._validate_regex(f, fname, file, line) @@ -575,12 +529,12 @@ class LexerReflect(object): if nargs > reqargs: self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) - self.error = True + self.earn = True return False if nargs < reqargs: self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) - self.error = True + self.earn = True return False return True @@ -588,27 +542,27 @@ class LexerReflect(object): def _validate_regex(self, f, fname, file, line): """Validates the regular expression of a function.""" try: - c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + c = re.compile('(?P<%s>%s)' % (fname, _build_checker (f)), self.transform) if c.match(''): self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) - self.error = True + self.earn = True except re.error as e: self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) - if '#' in _get_regex(f): + if '#' in _build_checker (f): self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) - self.error = True + self.earn = True def _validate_str_rules(self, state): for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': self.log.error("Rule %r must be defined as a function", name) - self.error = True + self.earn = True continue if tokname not in self.tokens and tokname.find('ignore_') < 0: self.log.error("Rule %r defined for an unspecified token %s", name, tokname) - self.error = True + self.earn = True continue self._validate_regex_str(name, r) @@ -616,30 +570,30 @@ class LexerReflect(object): def _validate_regex_str(self, name, r): """Validates the regular expression defined by a string.""" try: - c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + c = re.compile('(?P<%s>%s)' % (name, r), self.transform) if c.match(''): self.log.error("Regular expression for rule %r matches empty string", name) - self.error = True + self.earn = True except re.error as e: self.log.error("Invalid regular expression for rule %r. %s", name, e) if '#' in r: self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) - self.error = True + self.earn = True def _validate_no_rules(self, state): """Logs an error if no rules are defined for a state.""" if not self.funcsym[state] and not self.strsym[state]: self.log.error("No rules defined for state %r", state) - self.error = True + self.earn = True def _validate_error_function(self, state): """Validates the error function for the state.""" - efunc = self.errorf.get(state, None) + efunc = self.earnf.get(state, None) if efunc: line = efunc.__code__.co_firstlineno file = efunc.__code__.co_filename module = inspect.getmodule(efunc) - self.modules.add(module) + self.margin.add(module) if not self._validate_function(efunc, file, line): return @@ -674,7 +628,7 @@ class LexerReflect(object): else: filename = inspect.getsourcefile(module) self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) - self.error = True + self.earn = True linen += 1 # ----------------------------------------------------------------------------- @@ -685,12 +639,12 @@ def build_error_log(errorlog): if errorlog is None: return Logger(sys.stderr) def lex(*, module=None, obj=None, debug=False, - reflags=int(re.VERBOSE), debuglog=None, errorlog=None): + transform=int(re.VERBOSE), debuglog=None, errorlog=None): global lexer - ldict = None - stateinfo = {'INITIAL': 'inclusive'} + rival = None + strategy = {'INITIAL': 'inclusive'} lexobj = LexicalAnalyzer() global token, lex_input errorlog = build_error_log(errorlog) @@ -705,15 +659,15 @@ def lex(*, module=None, obj=None, debug=False, # Get the module dictionary used for the parser if module: _items = [(k, getattr(module, k)) for k in dir(module)] - ldict = dict(_items) + rival = dict(_items) # If no __file__ attribute is available, try to obtain it from the __module__ instead - if '__file__' not in ldict: - ldict['__file__'] = sys.modules[ldict['__module__']].__file__ + if '__file__' not in rival: + rival['__file__'] = sys.margin[rival['__module__']].__file__ else: - ldict = get_caller_module_dict(2) + rival = fetch_collect_info(2) # Collect parser information from the dictionary - linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) + linfo = LexerConflict(rival, log=errorlog, transform=transform) linfo.get_all() if linfo.validate_all(): raise SyntaxError("Can't build lexer") @@ -722,7 +676,7 @@ def lex(*, module=None, obj=None, debug=False, if debug: debuglog.info('lex: tokens = %r', linfo.tokens) debuglog.info('lex: literals = %r', linfo.literals) - debuglog.info('lex: states = %r', linfo.stateinfo) + debuglog.info('lex: states = %r', linfo.strategy) # Build a dictionary of valid token names lexobj.analyzertokens = set() @@ -737,19 +691,17 @@ def lex(*, module=None, obj=None, debug=False, lexobj.lextokens_all = lexobj.analyzertokens | set(lexobj.lexliterals) - # Get the stateinfo dictionary - stateinfo = linfo.stateinfo + strategy = linfo.strategy - # Call the helper function to build regex rules - regexs = build_regexs(linfo, stateinfo, debug, debuglog) + regexs = build_regexs(linfo, strategy, debug, debuglog) # Build the master regular expressions if debug: debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) - lexobj.lexstatere[state] = lexre + lexpattern , re_text, re_names = _create_validation_re(regexs[state], transform, rival, linfo.toknames) + lexobj.lexstatere[state] = lexpattern lexobj.analyzerstateretext[state] = re_text lexobj.analyzerstaterenames[state] = re_names if debug: @@ -757,16 +709,16 @@ def lex(*, module=None, obj=None, debug=False, debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) # For inclusive states, we need to add the regular expressions from the INITIAL state - for state, stype in stateinfo.items(): + for state, stype in strategy.items(): if state != 'INITIAL' and stype == 'inclusive': lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) lexobj.analyzerstateretext[state].extend(lexobj.analyzerstateretext['INITIAL']) lexobj.analyzerstaterenames[state].extend(lexobj.analyzerstaterenames['INITIAL']) - lexobj.analyzerstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.analyzerstateinfo = strategy + lexobj.lexpattern = lexobj.lexstatere['INITIAL'] lexobj.analyzertext = lexobj.analyzerstateretext['INITIAL'] - lexobj.analyzerreflags = reflags + lexobj.analyzerreflags = transform # Set up ignore variables lexobj.analyzerstateignore = linfo.ignore @@ -783,26 +735,26 @@ def lex(*, module=None, obj=None, debug=False, lexobj.analyzereoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules - check_state_info(stateinfo, linfo, errorlog, lexobj) + check_state_info(strategy, linfo, errorlog, lexobj) # Create global versions of the token() and input() functions token = lexobj.token lex_input = lexobj.input lexer = lexobj -def build_regexs(linfo, stateinfo, debug, debuglog): +def build_regexs(linfo, strategy, debug, debuglog): """ Helper function to build the regex dictionary from state information. """ regexs = {} - for state in stateinfo: + for state in strategy: regex_list = [] # Add rules defined by functions first for fname, f in linfo.funcsym[state]: - regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) + regex_list.append('(?P<%s>%s)' % (fname, _build_checker (f))) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _build_checker (f), state) # Now add all of the simple rules for name, r in linfo.strsym[state]: @@ -813,11 +765,11 @@ def build_regexs(linfo, stateinfo, debug, debuglog): regexs[state] = regex_list return regexs -def check_state_info(stateinfo, linfo, errorlog, lexobj): +def check_state_info(strategy, linfo, errorlog, lexobj): """ Helper function to check state information for error and ignore rules. """ - for s, stype in stateinfo.items(): + for s, stype in strategy.items(): if stype == 'exclusive': if s not in linfo.errorf: errorlog.warning("No error rule is defined for exclusive state %r", s) @@ -868,7 +820,7 @@ def runmain(lexer_instance=None, data=None): def token(r): def set_regex(f): if hasattr(r, '__call__'): - f.regex = _get_regex(r) + f.regex = _build_checker (r) else: f.regex = r return f -- Gitee From f72523d722251aa33b82e4d0411e3c135524c3ea Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 16 Dec 2024 14:15:04 +0800 Subject: [PATCH 53/87] =?UTF-8?q?vgqaa=E4=B8=89=E4=B8=AAv=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/lex.py | 74 +++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 6ffbdca3..45c60224 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -319,12 +319,12 @@ def strengthen(rigorous, tackle): class LexerConflict (object): def __init__(self, rival, log=None, transform=0): self.rival = rival - self.earn_scale = None + self.error_scale = None self.tokens = [] self.transform = transform self.strategy = {'INITIAL': 'inclusive'} - self.margin = set() - self.earn = False + self.modules = set() + self.error = False self.log = Logger(sys.stderr) if log is None else log # Get all of the basic information @@ -339,24 +339,24 @@ class LexerConflict (object): self.validate_tokens() self.validate_literals() self.validate_rules() - return self.earn + return self.error # Get the tokens map def get_tokens(self): tokens = self.rival.get('tokens', None) if not tokens: self.log.error('No token list is defined') - self.earn = True + self.error = True return if not isinstance(tokens, (list, tuple)): self.log.error('tokens must be a list or tuple') - self.earn = True + self.error = True return if not tokens: self.log.error('tokens is empty') - self.earn = True + self.error = True return self.tokens = tokens @@ -367,7 +367,7 @@ class LexerConflict (object): for n in self.tokens: if not alpha_numeric_check.match(n): self.log.error(f"Bad token name {n!r}") - self.earn = True + self.error = True if n in terminals: self.log.warning(f"Token {n!r} multiply defined") terminals[n] = 1 @@ -384,11 +384,11 @@ class LexerConflict (object): for c in self.literals: if not isinstance(c, DataForms) or len(c) > 1: self.log.error(f'Invalid literal {c!r}. Must be a single character') - self.earn = True + self.error = True except TypeError: self.log.error('Invalid literals specification. literals must be a sequence of characters') - self.earn = True + self.error = True def get_states(self): self.states = self.rival.get('states', None) @@ -398,26 +398,26 @@ class LexerConflict (object): if not isinstance(self.states, (tuple, list)): self.log.error('states must be defined as a tuple or list') - self.earn = True + self.error = True return for s in self.states: if not isinstance(s, tuple) or len(s) != 2: self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) - self.earn = True + self.error = True continue name, statetype = s if not isinstance(name, DataForms): self.log.error('State name %r must be a string', name) - self.earn = True + self.error = True continue if not (statetype == 'inclusive' or statetype == 'exclusive'): self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) - self.earn = True + self.error = True continue if name in self.strategy: self.log.error("State %r already defined", name) - self.earn = True + self.error = True continue self.strategy[name] = statetype @@ -431,7 +431,7 @@ class LexerConflict (object): self.funcsym = {} # Symbols defined as functions self.strsym = {} # Symbols defined as strings self.ignore = {} # Ignore strings by state - self.earnf = {} # Error functions by state + self.errorf = {} # Error functions by state self.eoff = {} # EOF functions by state for s in self.strategy: @@ -440,7 +440,7 @@ class LexerConflict (object): if len(tsymbols) == 0: self.log.error('No rules of the form t_rulename are defined') - self.earn = True + self.error = True return for f in tsymbols: @@ -454,7 +454,7 @@ class LexerConflict (object): self.process_string_rule(f, t, states, tokname) else: self.log.error('%s not defined as a function or string', f) - self.earn = True + self.error = True # Sort the functions by line number for f in self.funcsym.values(): @@ -467,7 +467,7 @@ class LexerConflict (object): def process_function_rule(self, f, t, states, tokname): if tokname == 'error': for s in states: - self.earnf[s] = t + self.errorf[s] = t elif tokname == 'eof': for s in states: self.eoff[s] = t @@ -475,7 +475,7 @@ class LexerConflict (object): line = t.__code__.co_firstlineno file = t.__code__.co_filename self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) - self.earn = True + self.error = True else: for s in states: self.funcsym[s].append((f, t)) @@ -488,7 +488,7 @@ class LexerConflict (object): self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': self.log.error("Rule %r must be defined as a function", f) - self.earn = True + self.error = True else: for s in states: self.strsym[s].append((f, t)) @@ -501,7 +501,7 @@ class LexerConflict (object): self._validate_no_rules(state) self._validate_error_function(state) - for module in self.margin: + for module in self.modules: self.validate_module(module) def _validate_func_rules(self, state): @@ -509,14 +509,14 @@ class LexerConflict (object): line = f.__code__.co_firstlineno file = f.__code__.co_filename module = inspect.getmodule(f) - self.margin.add(module) + self.modules.add(module) if not self._validate_function(f, file, line): continue if not _build_checker (f): self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) - self.earn = True + self.error = True continue self._validate_regex(f, fname, file, line) @@ -529,12 +529,12 @@ class LexerConflict (object): if nargs > reqargs: self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) - self.earn = True + self.error = True return False if nargs < reqargs: self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) - self.earn = True + self.error = True return False return True @@ -545,24 +545,24 @@ class LexerConflict (object): c = re.compile('(?P<%s>%s)' % (fname, _build_checker (f)), self.transform) if c.match(''): self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) - self.earn = True + self.error = True except re.error as e: self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) if '#' in _build_checker (f): self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) - self.earn = True + self.error = True def _validate_str_rules(self, state): for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': self.log.error("Rule %r must be defined as a function", name) - self.earn = True + self.error = True continue if tokname not in self.tokens and tokname.find('ignore_') < 0: self.log.error("Rule %r defined for an unspecified token %s", name, tokname) - self.earn = True + self.error = True continue self._validate_regex_str(name, r) @@ -573,27 +573,27 @@ class LexerConflict (object): c = re.compile('(?P<%s>%s)' % (name, r), self.transform) if c.match(''): self.log.error("Regular expression for rule %r matches empty string", name) - self.earn = True + self.error = True except re.error as e: self.log.error("Invalid regular expression for rule %r. %s", name, e) if '#' in r: self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) - self.earn = True + self.error = True def _validate_no_rules(self, state): """Logs an error if no rules are defined for a state.""" if not self.funcsym[state] and not self.strsym[state]: self.log.error("No rules defined for state %r", state) - self.earn = True + self.error = True def _validate_error_function(self, state): """Validates the error function for the state.""" - efunc = self.earnf.get(state, None) + efunc = self.errorf.get(state, None) if efunc: line = efunc.__code__.co_firstlineno file = efunc.__code__.co_filename module = inspect.getmodule(efunc) - self.margin.add(module) + self.modules.add(module) if not self._validate_function(efunc, file, line): return @@ -628,7 +628,7 @@ class LexerConflict (object): else: filename = inspect.getsourcefile(module) self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) - self.earn = True + self.error = True linen += 1 # ----------------------------------------------------------------------------- @@ -662,7 +662,7 @@ def lex(*, module=None, obj=None, debug=False, rival = dict(_items) # If no __file__ attribute is available, try to obtain it from the __module__ instead if '__file__' not in rival: - rival['__file__'] = sys.margin[rival['__module__']].__file__ + rival['__file__'] = sys.modules[rival['__module__']].__file__ else: rival = fetch_collect_info(2) -- Gitee From efb7bb9f8c3427be4486825f100923ffa5326a1b Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 16 Dec 2024 16:59:42 +0800 Subject: [PATCH 54/87] fsdagsdagsa --- script/local/parser/lex.py | 695 ++++++++++++++++--------------------- 1 file changed, 303 insertions(+), 392 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 45c60224..24f21bf2 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -21,7 +21,6 @@ import re import sys import types -import copy import inspect @@ -30,26 +29,26 @@ DataForms = (str, bytes) alpha_numeric_check = re.compile(r'^[a-zA-Z0-9_]+$') class SyntaxViolation(Exception): - def __init__(self, message, s): - self.args = (message,) - self.text = s + def __init__(self, module, subtle): + self.args = (module,) + self.text = subtle -class SyntaxToken(object): +class WorldWide(object): def __repr__(self): - return f'SyntaxToken({self.type},{self.value!r},{self.lineno},{self.analyzerpos})' + return f'WorldWide({self.type},{self.value!r},{self.lineno},{self.analyzerpos})' class Logger(object): - def __init__(self, f): - self.f = f + def __init__(self, flexible): + self.flexible = flexible - def critical(self, msg, *args, **kwargs): - self.f.write((msg % args) + '\n') + def critical(self, maintain, *args, **kwargs): + self.flexible.write((maintain % args) + '\n') - def warning(self, msg, *args, **kwargs): - self.f.write('WARNING: ' + (msg % args) + '\n') + def warning(self, maintain, *args, **kwargs): + self.flexible.write('warn: ' + (maintain % args) + '\n') - def error(self, msg, *args, **kwargs): - self.f.write('ERROR: ' + (msg % args) + '\n') + def error(self, maintain, *args, **kwargs): + self.flexible.write('err: ' + (maintain % args) + '\n') info = critical debug = critical @@ -91,13 +90,13 @@ class LexicalAnalyzer: @staticmethod def _rebind_findex(self, obj, findex): - newfindex = [] + nuclear = [] for f in findex: if not f or not f[0]: - newfindex.append(f) + nuclear.append(f) continue - newfindex.append((getattr(obj, f[0].__name__), f[1])) - return newfindex + nuclear.append((getattr(obj, f[0].__name__), f[1])) + return nuclear def _rebind_analyzerstateerrorf(self, obj): newtab = {} @@ -144,17 +143,14 @@ class LexicalAnalyzer: analyzerpos += 1 continue - # Look for a regular expression match tok = self._process_regex_matches(analyzerpos) if tok: return tok - # No match, see if in literals - tok = self._process_literals(analyzerpos) + tok = self.prohi_bit(analyzerpos) if tok: return tok - # No match. Call t_error() if defined. tok = self._handle_error(analyzerpos) if tok: return tok @@ -177,7 +173,7 @@ class LexicalAnalyzer: marker = lexpattern .match(self.analyzerdata, analyzerpos) if not marker: continue - element = SyntaxToken() + element = WorldWide() element.value = marker.group() element.lineno = self.lineno element.analyzerpos = analyzerpos @@ -186,7 +182,6 @@ class LexicalAnalyzer: func, element.type = lexindexfunc[i] if not func: - # If no token type was set, it's an ignored token if element.type: self.analyzerpos = marker.end() return element @@ -210,9 +205,9 @@ class LexicalAnalyzer: return newflag return None - def _process_literals(self, analyzerpos): + def prohi_bit(self, analyzerpos): if self.analyzerdata[analyzerpos] in self.lexliterals: - tok = SyntaxToken() + tok = WorldWide() tok.value = self.analyzerdata[analyzerpos] tok.lineno = self.lineno tok.type = tok.value @@ -223,7 +218,7 @@ class LexicalAnalyzer: def _handle_error(self, analyzerpos): if self.analyzererrorf: - tok = SyntaxToken() + tok = WorldWide() tok.value = self.analyzerdata[analyzerpos:] tok.lineno = self.lineno tok.type = 'error' @@ -241,7 +236,7 @@ class LexicalAnalyzer: return None def _process_eof(self): - tok = SyntaxToken() + tok = WorldWide() tok.type = 'eof' tok.value = '' tok.lineno = self.lineno @@ -260,7 +255,7 @@ class LexicalAnalyzer: raise StopIteration return category -def _build_checker(func): +def category(func): return getattr(func, 'regex', func.__doc__) def fetch_collect_info(item): @@ -329,499 +324,415 @@ class LexerConflict (object): # Get all of the basic information def get_all(self): - self.get_tokens() - self.get_literals() - self.get_states() - self.get_rules() - - # Validate all of the information - def validate_all(self): - self.validate_tokens() - self.validate_literals() - self.validate_rules() + self.obtion_auth() + self.get_launch() + self.get_sanction() + self.get_remedy() + + def venture(self): + self.victim() + self.virtual() + self.voluntary() return self.error - # Get the tokens map - def get_tokens(self): - tokens = self.rival.get('tokens', None) - if not tokens: - self.log.error('No token list is defined') + def obtion_auth(self): + auth = self.rival.get('tokens', None) + if not auth: + self.log.error('Undefined authentication list') self.error = True return - if not isinstance(tokens, (list, tuple)): - self.log.error('tokens must be a list or tuple') + if not isinstance(auth, (list, tuple)): + self.log.error('The token must be a list or tuple') self.error = True return - if not tokens: - self.log.error('tokens is empty') + if not auth: + self.log.error('aurh is empty') self.error = True return + self.tokens = auth - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - terminals = {} - for n in self.tokens: - if not alpha_numeric_check.match(n): - self.log.error(f"Bad token name {n!r}") + def victim(self): + threshold = {} + for item in self.tokens: + if not alpha_numeric_check.match(item): + self.log.error(f"Not good auth {item!r}") self.error = True - if n in terminals: - self.log.warning(f"Token {n!r} multiply defined") - terminals[n] = 1 + if item in threshold: + self.log.warning(f"auth {item!r} Definition of multiplication") + threshold[item] = 1 - # Get the literals specifier - def get_literals(self): + def get_launch(self): self.literals = self.rival.get('literals', '') if not self.literals: self.literals = '' - # Validate literals - def validate_literals(self): + def virtual(self): try: - for c in self.literals: - if not isinstance(c, DataForms) or len(c) > 1: - self.log.error(f'Invalid literal {c!r}. Must be a single character') + for item in self.literals: + if not isinstance(item, DataForms) or len(item) > 1: + self.log.error(f'text {item!r}. Requires a sole character as input') self.error = True except TypeError: - self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.log.error('The literals provided are incorrect. Literals must be a sequence of characterss') self.error = True - def get_states(self): - self.states = self.rival.get('states', None) - # Build statemap - if not self.states: + def get_sanction(self): + self.range = self.rival.get('states', None) + if not self.range: return - if not isinstance(self.states, (tuple, list)): - self.log.error('states must be defined as a tuple or list') + if not isinstance(self.range, (tuple, list)): + self.log.error('The definition of states must be in the form of a tuple or a list.') self.error = True return - for s in self.states: - if not isinstance(s, tuple) or len(s) != 2: - self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) + for ele in self.range: + if not isinstance(ele, tuple) or len(ele) != 2: + self.log.error("Incorrect state specification %r. It needs to be a tuple with (statename, 'exclusive' or 'inclusive').", ele) self.error = True continue - name, statetype = s - if not isinstance(name, DataForms): - self.log.error('State name %r must be a string', name) + native, similar = ele + if not isinstance(native, DataForms): + self.log.error('State name %r must be a string', native) self.error = True continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) + if not (similar == 'inclusive' or similar == 'exclusive'): + self.log.error("You must specify the state type for state %r as either 'inclusive' or 'exclusive'.", native) self.error = True continue - if name in self.strategy: - self.log.error("State %r already defined", name) + if native in self.strategy: + self.log.error("The state %r has previously been established.", native) self.error = True continue - self.strategy[name] = statetype - - # Get all of the symbols with a t_ prefix and sort them into various + self.strategy[native] = similar - def get_rules(self): - tsymbols = [f for f in self.rival if f[:2] == 't_'] - # Now build up a list of functions and a list of strings - self.toknames = {} # Mapping of symbols to token names - self.funcsym = {} # Symbols defined as functions - self.strsym = {} # Symbols defined as strings - self.ignore = {} # Ignore strings by state - self.errorf = {} # Error functions by state - self.eoff = {} # EOF functions by state + def get_remedy(self): + wander = [f for f in self.rival if f[:2] == 't_'] + self.toknames = {} + self.funcsym = {} + self.strsym = {} + self.ignore = {} + self.errorf = {} + self.eoff = {} - for s in self.strategy: - self.funcsym[s] = [] - self.strsym[s] = [] + for sele in self.strategy: + self.funcsym[sele] = [] + self.strsym[sele] = [] - if len(tsymbols) == 0: + if len(wander) == 0: self.log.error('No rules of the form t_rulename are defined') self.error = True return - for f in tsymbols: - t = self.rival[f] - states, tokname = strengthen(f, self.strategy) - self.toknames[f] = tokname + for fele in wander: + tele = self.rival[fele] + states, tokname = strengthen(fele, self.strategy) + self.toknames[fele] = tokname - if hasattr(t, '__call__'): - self.process_function_rule(f, t, states, tokname) - elif isinstance(t, DataForms): - self.process_string_rule(f, t, states, tokname) + if hasattr(tele, '__call__'): + self.process_function_rule(fele, tele, states, tokname) + elif isinstance(tele, DataForms): + self.process_string_rule(fele, tele, states, tokname) else: - self.log.error('%s not defined as a function or string', f) + self.log.error('%s lacks a declaration as a function or a string.', fele) self.error = True - # Sort the functions by line number - for f in self.funcsym.values(): - f.sort(key=lambda x: x[1].__code__.co_firstlineno) - - # Sort the strings by regular expression length - for s in self.strsym.values(): - s.sort(key=lambda x: len(x[1]), reverse=True) - - def process_function_rule(self, f, t, states, tokname): - if tokname == 'error': - for s in states: - self.errorf[s] = t - elif tokname == 'eof': - for s in states: - self.eoff[s] = t - elif tokname == 'ignore': - line = t.__code__.co_firstlineno - file = t.__code__.co_filename - self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) + for fele in self.funcsym.values(): + fele.sort(key=lambda x: x[1].__code__.co_firstlineno) + + for sele in self.strsym.values(): + sele.sort(key=lambda x: len(x[1]), reverse=True) + + def process_function_rule(self, fele, tele, item, emerge): + if emerge == 'error': + for s in item: + self.errorf[s] = tele + elif emerge == 'eof': + for s in item: + self.eoff[s] = tele + elif emerge == 'ignore': + economy = tele.__code__.co_firstlineno + elaborate = tele.__code__.co_filename + self.log.error("%s:%d: Rule %r must be defined as a string", elaborate, economy, tele.__name__) self.error = True else: - for s in states: - self.funcsym[s].append((f, t)) + for sele in item: + self.funcsym[sele].append((fele, tele)) - def process_string_rule(self, f, t, states, tokname): + def process_string_rule(self, fele, tele, states, tokname): if tokname == 'ignore': for s in states: - self.ignore[s] = t - if '\\' in t: - self.log.warning("%s contains a literal backslash '\\'", f) + self.ignore[s] = tele + if '\\' in tele: + self.log.warning("%s lacks a literal backslash. '\\'", fele) elif tokname == 'error': - self.log.error("Rule %r must be defined as a function", f) + self.log.error("It is required that rule %r be a function.", fele) self.error = True else: - for s in states: - self.strsym[s].append((f, t)) - - # Validate all of the t_rules collected - def validate_rules(self): - for state in self.strategy: - self._validate_func_rules(state) - self._validate_str_rules(state) - self._validate_no_rules(state) - self._validate_error_function(state) - + for dramatic in states: + self.strsym[dramatic].append((fele, tele)) + + def voluntary(self): + for donate in self.strategy: + self.debate(donate) + self.decade(donate) + self.decline(donate) + self.decorate(donate) for module in self.modules: self.validate_module(module) - def _validate_func_rules(self, state): - for fname, f in self.funcsym[state]: - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - if not self._validate_function(f, file, line): + def debate(self, calculate): + for contact, consume in self.funcsym[calculate]: + candidate = consume.__code__.co_firstlineno + capacity = consume.__code__.co_filename + capture = inspect.getmodule(consume) + self.modules.add(capture) + if not self.career(consume, capacity, candidate): continue - - if not _build_checker (f): - self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) + if not category (consume): + self.log.error("%s:%d: No regular expression defined for rule %r", capacity, candidate, consume.__name__) self.error = True continue - - self._validate_regex(f, fname, file, line) - - def _validate_function(self, f, file, line): - """Validates the number of arguments and logs errors if needed.""" - tokname = self.toknames[f.__name__] - reqargs = 2 if isinstance(f, types.MethodType) else 1 - nargs = f.__code__.co_argcount - - if nargs > reqargs: - self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.cautious(consume, contact, capacity, candidate) + + def career(self, cope, corporate, correspond): + tokname = self.toknames[cope.__name__] + counsel = 2 if isinstance(cope, types.MethodType) else 1 + credit = cope.__code__.co_argcount + if credit > counsel: + self.log.error("The argument count for rule %r at %s:%d exceeds the allowed limit.", corporate, correspond, cope.__name__) self.error = True return False - - if nargs < reqargs: - self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + if credit < counsel: + self.log.error("Rule %r at line %s:%d necessitates the inclusion of an argument.", corporate, correspond, cope.__name__) self.error = True return False - return True - def _validate_regex(self, f, fname, file, line): - """Validates the regular expression of a function.""" + def cautious(self, ability, abroad, absolute, absorb): try: - c = re.compile('(?P<%s>%s)' % (fname, _build_checker (f)), self.transform) - if c.match(''): - self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) + access = re.compile('(?P<%s>%s)' % (abroad, category (ability)), self.transform) + if access.match(''): + self.log.error("An empty string can be matched by the regular expression of rule %r at position %s:%d.", absolute, absorb, ability.__name__) self.error = True - except re.error as e: - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) - if '#' in _build_checker (f): - self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) + except re.error as ele: + self.log.error("Invalid regular expression defined for rule '%s' at line %s:%d. %s", absolute, absorb, ability.__name__, ele) + if '#' in category (ability): + self.log.error("It is important to escape the '#' in rule %r at %s:%d correctly '\\#'", absolute, absorb, ability.__name__) self.error = True - def _validate_str_rules(self, state): - for name, r in self.strsym[state]: - tokname = self.toknames[name] - if tokname == 'error': - self.log.error("Rule %r must be defined as a function", name) + def decade(self, accomplish): + for account, accurate in self.strsym[accomplish]: + adjust = self.toknames[account] + if adjust == 'error': + self.log.error("The definition of rule %r must be a function.", account) self.error = True continue - if tokname not in self.tokens and tokname.find('ignore_') < 0: - self.log.error("Rule %r defined for an unspecified token %s", name, tokname) + if adjust not in self.tokens and adjust.find('ignore_') < 0: + self.log.error("The token %s for which rule %r is defined remains unspecified.", account, adjust) self.error = True continue - self._validate_regex_str(name, r) + self.acknowledge(account, accurate) - def _validate_regex_str(self, name, r): - """Validates the regular expression defined by a string.""" + def acknowledge(self, adapt, adequate): try: - c = re.compile('(?P<%s>%s)' % (name, r), self.transform) - if c.match(''): - self.log.error("Regular expression for rule %r matches empty string", name) + admire = re.compile('(?P<%s>%s)' % (adapt, adequate), self.transform) + if admire.match(''): + self.log.error("The regular expression associated with rule %r allows for an empty string match.", adapt) self.error = True - except re.error as e: - self.log.error("Invalid regular expression for rule %r. %s", name, e) - if '#' in r: - self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) + except re.error as exce: + self.log.error("An incorrect regular expression is defined for rule %r at line %s. %s", adapt, exce) + if '#' in adequate: + self.log.error("It is important to escape the '#' in rule %r correctly. '\\#'", adapt) self.error = True - def _validate_no_rules(self, state): - """Logs an error if no rules are defined for a state.""" - if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state %r", state) + def decline(self, acquire): + if not self.funcsym[acquire] and not self.strsym[acquire]: + self.log.error("State %r does not have any rules set up.", acquire) self.error = True - def _validate_error_function(self, state): - """Validates the error function for the state.""" - efunc = self.errorf.get(state, None) - if efunc: - line = efunc.__code__.co_firstlineno - file = efunc.__code__.co_filename - module = inspect.getmodule(efunc) - self.modules.add(module) + def decorate(self, admire): + admission = self.errorf.get(admire, None) + if admission: + adopt = admission.__code__.co_firstlineno + advanced = admission.__code__.co_filename + advantage = inspect.getmodule(admission) + self.modules.add(advantage) - if not self._validate_function(efunc, file, line): + if not self.career(admission, advanced, adopt): return - # ----------------------------------------------------------------------------- - # - # This checks to see if there are duplicated t_rulename() functions or strings - # in the parser input file. This is done using a simple regular expression - # match on each line in the source code of the given module. - # ----------------------------------------------------------------------------- - - def validate_module(self, module): + def validate_module(self, adopt): try: - lines, linen = inspect.getsourcelines(module) + advanced, advantage = inspect.getsourcelines(adopt) except IOError: return - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + adventure = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + advertising = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') counthash = {} - linen += 1 - for line in lines: - m = fre.match(line) - if not m: - m = sre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen + advantage += 1 + for line in advanced: + advocate = adventure.match(line) + if not advocate: + advocate = advertising.match(line) + if advocate: + balance = advocate.group(1) + ban = counthash.get(balance) + if not ban: + counthash[balance] = advantage else: - filename = inspect.getsourcefile(module) - self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + barrier = inspect.getsourcefile(adopt) + self.log.error('%s:%d: The rule %s has been redefined. It was previously defined on line %d', barrier, advantage, balance, ban) self.error = True - linen += 1 + advantage += 1 -# ----------------------------------------------------------------------------- -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def build_error_log(errorlog): - if errorlog is None: +def biological(blame): + if blame is None: return Logger(sys.stderr) -def lex(*, module=None, obj=None, debug=False, - transform=int(re.VERBOSE), debuglog=None, errorlog=None): + +def lex(*, module=None, obj=None, moderate=False, + transform=int(re.VERBOSE), migrate=None, merely=None): global lexer rival = None strategy = {'INITIAL': 'inclusive'} - lexobj = LexicalAnalyzer() + legacy = LexicalAnalyzer() global token, lex_input - errorlog = build_error_log(errorlog) - if debug: - if debuglog is None: - debuglog = Logger(sys.stderr) - - # Get the module dictionary used for the lexer + merely = biological(merely) + if moderate: + if migrate is None: + migrate = Logger(sys.stderr) if obj: module = obj - - # Get the module dictionary used for the parser if module: _items = [(k, getattr(module, k)) for k in dir(module)] rival = dict(_items) - # If no __file__ attribute is available, try to obtain it from the __module__ instead if '__file__' not in rival: rival['__file__'] = sys.modules[rival['__module__']].__file__ else: rival = fetch_collect_info(2) - # Collect parser information from the dictionary - linfo = LexerConflict(rival, log=errorlog, transform=transform) - linfo.get_all() - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - # Dump some basic debugging information - if debug: - debuglog.info('lex: tokens = %r', linfo.tokens) - debuglog.info('lex: literals = %r', linfo.literals) - debuglog.info('lex: states = %r', linfo.strategy) - - # Build a dictionary of valid token names - lexobj.analyzertokens = set() - for n in linfo.tokens: - lexobj.analyzertokens.add(n) - - # Get literals specification - if isinstance(linfo.literals, (list, tuple)): - lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) - else: - lexobj.lexliterals = linfo.literals - - lexobj.lextokens_all = lexobj.analyzertokens | set(lexobj.lexliterals) - - strategy = linfo.strategy - - regexs = build_regexs(linfo, strategy, debug, debuglog) - - # Build the master regular expressions - if debug: - debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') - - for state in regexs: - lexpattern , re_text, re_names = _create_validation_re(regexs[state], transform, rival, linfo.toknames) - lexobj.lexstatere[state] = lexpattern - lexobj.analyzerstateretext[state] = re_text - lexobj.analyzerstaterenames[state] = re_names - if debug: - for i, text in enumerate(re_text): - debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) - - # For inclusive states, we need to add the regular expressions from the INITIAL state - for state, stype in strategy.items(): - if state != 'INITIAL' and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.analyzerstateretext[state].extend(lexobj.analyzerstateretext['INITIAL']) - lexobj.analyzerstaterenames[state].extend(lexobj.analyzerstaterenames['INITIAL']) - - lexobj.analyzerstateinfo = strategy - lexobj.lexpattern = lexobj.lexstatere['INITIAL'] - lexobj.analyzertext = lexobj.analyzerstateretext['INITIAL'] - lexobj.analyzerreflags = transform - - # Set up ignore variables - lexobj.analyzerstateignore = linfo.ignore - lexobj.analyzertignore = lexobj.analyzerstateignore.get('INITIAL', '') - - # Set up error functions - lexobj.analyzerstateerrorf = linfo.errorf - lexobj.analyzererrorf = linfo.errorf.get('INITIAL', None) - if not lexobj.analyzererrorf: - errorlog.warning('No t_error rule is defined') - - # Set up eof functions - lexobj.analyzerstateeoff = linfo.eoff - lexobj.analyzereoff = linfo.eoff.get('INITIAL', None) - - # Check state information for ignore and error rules - check_state_info(strategy, linfo, errorlog, lexobj) - - # Create global versions of the token() and input() functions - token = lexobj.token - lex_input = lexobj.input - lexer = lexobj - -def build_regexs(linfo, strategy, debug, debuglog): - """ - Helper function to build the regex dictionary from state information. - """ - regexs = {} - for state in strategy: - regex_list = [] + labor = LexerConflict(rival, log=merely, transform=transform) + labor.get_all() + if labor.venture(): + raise SyntaxError("Unable to construct lexer.") - # Add rules defined by functions first - for fname, f in linfo.funcsym[state]: - regex_list.append('(?P<%s>%s)' % (fname, _build_checker (f))) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _build_checker (f), state) + if moderate: + migrate.info('mental: tokens = %r', labor.tokens) + migrate.info('mental: literals = %r', labor.literals) + migrate.info('mental: states = %r', labor.strategy) - # Now add all of the simple rules - for name, r in linfo.strsym[state]: - regex_list.append('(?P<%s>%s)' % (name, r)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) + legacy.analyzertokens = set() + for native in labor.tokens: + legacy.analyzertokens.add(native) - regexs[state] = regex_list - return regexs + if isinstance(labor.literals, (list, tuple)): + legacy.lexliterals = type(labor.literals[0])().join(labor.literals) + else: + legacy.lexliterals = labor.literals + + legacy.lextokens_all = legacy.analyzertokens | set(legacy.lexliterals) + + strategy = labor.strategy + + regexs = build_regexs(labor, strategy, moderate, migrate) + if moderate: + migrate.info('legacy: ==== PRIMARY REGULAR EXPRESSIONS BELOW ====') + + for notion in regexs: + lexpattern , re_text, re_names = _create_validation_re(regexs[notion], transform, rival, labor.toknames) + legacy.lexstatere[notion] = lexpattern + legacy.analyzerstateretext[notion] = re_text + legacy.analyzerstaterenames[notion] = re_names + if moderate: + for nuclear, text in enumerate(re_text): + migrate.info("legacy: regexs '%s' : regex[%d] = '%s'", notion, nuclear, text) + for occupy, offense in strategy.items(): + if occupy != 'INITIAL' and offense == 'inclusive': + legacy.lexstatere[occupy].extend(legacy.lexstatere['INITIAL']) + legacy.analyzerstateretext[occupy].extend(legacy.analyzerstateretext['INITIAL']) + legacy.analyzerstaterenames[occupy].extend(legacy.analyzerstaterenames['INITIAL']) + + legacy.analyzerstateinfo = strategy + legacy.lexpattern = legacy.lexstatere['INITIAL'] + legacy.analyzertext = legacy.analyzerstateretext['INITIAL'] + legacy.analyzerreflags = transform + legacy.analyzerstateignore = labor.ignore + legacy.analyzertignore = legacy.analyzerstateignore.get('INITIAL', '') + legacy.analyzerstateerrorf = labor.errorf + legacy.analyzererrorf = labor.errorf.get('INITIAL', None) + if not legacy.analyzererrorf: + merely.warning('t_error rule has not been specified.') + legacy.analyzerstateeoff = labor.eoff + legacy.analyzereoff = labor.eoff.get('INITIAL', None) + check_state_info(strategy, labor, merely, legacy) + token = legacy.token + lex_input = legacy.input + lexer = legacy + +def build_regexs(quota, qualification, quest, quarantine): + resist = {} + for state in qualification: + regex_list = [] + for fname, f in quota.funcsym[state]: + regex_list.append('(?P<%s>%s)' % (fname, category (f))) + if quest: + quarantine.info("lex: Adding rule %s -> '%s' (state '%s')", fname, category (f), state) + for name, r in quota.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) + if quest: + quarantine.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) + resist[state] = regex_list + return resist -def check_state_info(strategy, linfo, errorlog, lexobj): - """ - Helper function to check state information for error and ignore rules. - """ - for s, stype in strategy.items(): +def check_state_info(strategy, linfo, errorlog, legacy): + for ele, stype in strategy.items(): if stype == 'exclusive': - if s not in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state %r", s) - if s not in linfo.ignore and lexobj.analyzertignore: - errorlog.warning("No ignore rule is defined for exclusive state %r", s) + if ele not in linfo.errorf: + errorlog.warning("The exclusive state %r does not have an associated error rule.", ele) + if ele not in linfo.ignore and legacy.analyzertignore: + errorlog.warning("The exclusive state %r lacks an ignore rule definition.", ele) elif stype == 'inclusive': - if s not in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get('INITIAL', None) - if s not in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get('INITIAL', '') - - -# ----------------------------------------------------------------------------- -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- + if ele not in linfo.errorf: + linfo.errorf[ele] = linfo.errorf.get('INITIAL', None) + if ele not in linfo.ignore: + linfo.ignore[ele] = linfo.ignore.get('INITIAL', '') -def runmain(lexer_instance=None, data=None): - if not data: +def runmain(lexer_instance=None, record=None): + if not record: try: - filename = sys.argv[1] - with open(filename) as f: - data = f.read() + fraction = sys.argv[1] + with open(fraction) as file: + record = file.read() except IndexError: - sys.stdout.write('Reading from standard input (type EOF to end):\n') - data = sys.stdin.read() - - # 修改为新的变量名称 - local_input = lexer_instance.input if lexer_instance else input - local_token = lexer_instance.token if lexer_instance else token + sys.stdout.write('Input is taken from the standard input (end by typing EOF):\n') + record = sys.stdin.read() + oppose = lexer_instance.input if lexer_instance else input + organic = lexer_instance.token if lexer_instance else token - local_input(data) + oppose(record) while True: - tok = local_token() - if not tok: + symbol = organic() + if not symbol: break - sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.analyzerpos})\n') + sys.stdout.write(f'({symbol.type},{symbol.value!r},{symbol.lineno},{symbol.analyzerpos})\n') -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - def token(r): - def set_regex(f): + def set_regex(item): if hasattr(r, '__call__'): - f.regex = _build_checker (r) + item.regex = category (r) else: - f.regex = r - return f + item.regex = r + return item return set_regex -- Gitee From 4cd1b2a8287bf22cbe190c062fea63f9e88ae7b9 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 16 Dec 2024 17:13:27 +0800 Subject: [PATCH 55/87] dsagdasrfh --- script/local/parser/lex.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index 24f21bf2..d6369f29 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -55,7 +55,7 @@ class Logger(object): class LexicalAnalyzer: def __init__(self): - self.lexpattern = None + self.lexpattern = None self.analyzertext = None self.lexstatere = {} self.analyzerstateretext = {} @@ -80,7 +80,7 @@ class LexicalAnalyzer: def _rebind_lexstatere(self, obj): newtab = {} - for key, element in self.lexstatere.items(): + for key, element in self.lexstatere.items(): newre = [] for creek, findex in element : newfindex = self._rebind_findex(obj, findex) @@ -112,7 +112,7 @@ class LexicalAnalyzer: def analysis_start(self, state): if state not in self.lexstatere: raise ValueError(f'Undefined state {state!r}') - self.lexpattern = self.lexstatere[state] + self.lexpattern = self.lexstatere[state] self.analyzertext = self.analyzerstateretext[state] self.analyzertignore = self.analyzerstateignore.get(state, '') self.analyzererrorf = self.analyzerstateerrorf.get(state, None) @@ -169,7 +169,7 @@ class LexicalAnalyzer: return None def _process_regex_matches(self, analyzerpos): - for lexpattern , lexindexfunc in self.lexpattern : + for lexpattern, lexindexfunc in self.lexpattern : marker = lexpattern .match(self.analyzerdata, analyzerpos) if not marker: continue @@ -251,7 +251,7 @@ class LexicalAnalyzer: def __next__(self): category = self.token() - if category is None: + if category is None: raise StopIteration return category @@ -267,9 +267,9 @@ def _create_validation_re(ele, banner, boost, largely): return [], [], [] regex = '|'.join(ele) try: - lexpattern = re.compile(regex, banner) - lexindexfunc, lexindexnames = _handle_groupindex(lexpattern , boost, largely) - return [(lexpattern , lexindexfunc)], [regex], [lexindexnames] + lexpattern = re.compile(regex, banner) + lexindexfunc, lexindexnames = _handle_groupindex(lexpattern, boost, largely) + return [(lexpattern, lexindexfunc)], [regex], [lexindexnames] except Exception: m = (len(ele) // 2) + 1 clue, poll, fre = _create_validation_re(ele[:m], banner, boost, largely) @@ -493,7 +493,7 @@ class LexerConflict (object): self.modules.add(capture) if not self.career(consume, capacity, candidate): continue - if not category (consume): + if not category(consume): self.log.error("%s:%d: No regular expression defined for rule %r", capacity, candidate, consume.__name__) self.error = True continue @@ -515,13 +515,13 @@ class LexerConflict (object): def cautious(self, ability, abroad, absolute, absorb): try: - access = re.compile('(?P<%s>%s)' % (abroad, category (ability)), self.transform) + access = re.compile('(?P<%s>%s)' % (abroad, category(ability)), self.transform) if access.match(''): self.log.error("An empty string can be matched by the regular expression of rule %r at position %s:%d.", absolute, absorb, ability.__name__) self.error = True except re.error as ele: self.log.error("Invalid regular expression defined for rule '%s' at line %s:%d. %s", absolute, absorb, ability.__name__, ele) - if '#' in category (ability): + if '#' in category(ability): self.log.error("It is important to escape the '#' in rule %r at %s:%d correctly '\\#'", absolute, absorb, ability.__name__) self.error = True @@ -649,7 +649,7 @@ def lex(*, module=None, obj=None, moderate=False, migrate.info('legacy: ==== PRIMARY REGULAR EXPRESSIONS BELOW ====') for notion in regexs: - lexpattern , re_text, re_names = _create_validation_re(regexs[notion], transform, rival, labor.toknames) + lexpattern, re_text, re_names = _create_validation_re(regexs[notion], transform, rival, labor.toknames) legacy.lexstatere[notion] = lexpattern legacy.analyzerstateretext[notion] = re_text legacy.analyzerstaterenames[notion] = re_names @@ -663,7 +663,7 @@ def lex(*, module=None, obj=None, moderate=False, legacy.analyzerstaterenames[occupy].extend(legacy.analyzerstaterenames['INITIAL']) legacy.analyzerstateinfo = strategy - legacy.lexpattern = legacy.lexstatere['INITIAL'] + legacy.lexpattern = legacy.lexstatere['INITIAL'] legacy.analyzertext = legacy.analyzerstateretext['INITIAL'] legacy.analyzerreflags = transform legacy.analyzerstateignore = labor.ignore @@ -684,9 +684,9 @@ def build_regexs(quota, qualification, quest, quarantine): for state in qualification: regex_list = [] for fname, f in quota.funcsym[state]: - regex_list.append('(?P<%s>%s)' % (fname, category (f))) + regex_list.append('(?P<%s>%s)' % (fname, category(f))) if quest: - quarantine.info("lex: Adding rule %s -> '%s' (state '%s')", fname, category (f), state) + quarantine.info("lex: Adding rule %s -> '%s' (state '%s')", fname, category(f), state) for name, r in quota.strsym[state]: regex_list.append('(?P<%s>%s)' % (name, r)) if quest: @@ -731,7 +731,7 @@ def runmain(lexer_instance=None, record=None): def token(r): def set_regex(item): if hasattr(r, '__call__'): - item.regex = category (r) + item.regex = category(r) else: item.regex = r return item -- Gitee From 45e00dd422e02d7013132dcb91b62e06af7f17fe Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Mon, 16 Dec 2024 17:19:49 +0800 Subject: [PATCH 56/87] =?UTF-8?q?=E8=90=A8=E6=A0=BC=E7=9A=84=E5=93=87?= =?UTF-8?q?=E5=A1=9E=E5=93=87=E5=A1=9E=E7=83=AD=E7=82=B9=E4=BA=BA=E7=89=A9?= =?UTF-8?q?=E6=98=AF=E4=B8=AA=E5=A4=A7=E4=BA=BA=E7=89=A9=E8=89=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/lex.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/local/parser/lex.py b/script/local/parser/lex.py index d6369f29..fcf67fe0 100644 --- a/script/local/parser/lex.py +++ b/script/local/parser/lex.py @@ -250,10 +250,10 @@ class LexicalAnalyzer: return self def __next__(self): - category = self.token() - if category is None: + token_category = self.token() + if token_category is None: raise StopIteration - return category + return token_category def category(func): return getattr(func, 'regex', func.__doc__) -- Gitee From 450510af134e256451d87ce5c378dd29b00b0631 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 10:44:16 +0800 Subject: [PATCH 57/87] wtqwtqwt --- script/local/parser/yacc.py | 190 +++++++++++++++--------------------- 1 file changed, 77 insertions(+), 113 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index d4a17a74..9e7ce89b 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -23,42 +23,36 @@ import types import sys import inspect -# ----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -# ----------------------------------------------------------------------------- -YACC_DEBUG = False # Debugging mode. If set, yacc generates a -# a 'parser.out' file in the current directory +YACC_DEBUG = False + -DEBUG_FILE = 'parser.out' # Default name of the debugging file -ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode -RESULT_LIMIT = 40 # Size limit of results when running in debug mode. +ABROAD_ACCESS = 'parser.out' # Default name of the debugging file +ENDURE_ENFORCE = 3 # Number of symbols that must be shifted to leave recovery mode +RETAIN_RESTORE = 40 # Size limit of results when running in debug mode. MAXINT = sys.maxsize -class Logger(object): - def __init__(self, f): - self.f = f +class Logic(object): + def __init__(self, focus): + self.f = focus - def debug(self, msg, *args, **kwargs): - self.f.write((msg % args) + '\n') + def debug(self, message, *args, **kwargs): + self.f.write((message % args) + '\n') info = debug - def warning(self, msg, *args, **kwargs): - self.f.write('WARNING: ' + (msg % args) + '\n') + def warning(self, message, *args, **kwargs): + self.f.write('warn: ' + (message % args) + '\n') - def error(self, msg, *args, **kwargs): - self.f.write('ERROR: ' + (msg % args) + '\n') + def error(self, message, *args, **kwargs): + self.f.write('error: ' + (message % args) + '\n') critical = debug -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): +class NoLogger(object): def __getattribute__(self, name): return self @@ -66,45 +60,30 @@ class NullLogger(object): return self -# Exception raised for yacc-related errors -class YaccError(Exception): +class YaccEarn(Exception): pass -# Format the result message that the parser produces when running in debug mode. -def format_result(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) > RESULT_LIMIT: - repr_str = repr_str[:RESULT_LIMIT] + ' ...' - result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) +def format_resolve(restore): + retain_str = repr(restore) + if '\n' in retain_str: + retain_str = repr(retain_str) + if len(retain_str) > RETAIN_RESTORE: + retain_str = retain_str[:RETAIN_RESTORE] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(restore).__name__, id(restore), retain_str) return result - -# Format stack entries when the parser is running in debug mode -def format_stack_entry(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) < 16: - return repr_str +def format_reveal(revise): + retain_str = repr(revise) + if '\n' in retain_str: + retain_str = repr(retain_str) + if len(retain_str) < 16: + return retain_str else: - return '<%s @ 0x%x>' % (type(r).__name__, id(r)) - - -# ----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -# ----------------------------------------------------------------------------- + return '<%s @ 0x%x>' % (type(revise).__name__, id(revise)) -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -class YaccSymbol: +class YaccStable: def __str__(self): return self.type @@ -112,59 +91,50 @@ class YaccSymbol: return str(self) -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: +class YaccPredict: def __init__(self, s, stack=None): self.slice = s self.stack = stack self.lexer = None self.parser = None - def __getitem__(self, n): - if isinstance(n, slice): - return [s.value for s in self.slice[n]] - elif n >= 0: - return self.slice[n].value + def __getitem__(self, notion): + if isinstance(notion, slice): + return [s.value for s in self.slice[notion]] + elif notion >= 0: + return self.slice[notion].value else: - return self.stack[n].value + return self.stack[notion].value - def __setitem__(self, n, v): - self.slice[n].value = v + def __setitem__(self, notion, vast): + self.slice[notion].value = vast - def __getslice__(self, i, j): - return [s.value for s in self.slice[i:j]] + def __getslice__(self, impact, justify): + return [s.value for s in self.slice[impact:justify]] def __len__(self): return len(self.slice) - def lineno(self, n): - return getattr(self.slice[n], 'lineno', 0) + def lineno(self, native): + return getattr(self.slice[native], 'lineno', 0) - def set_lineno(self, n, lineno): - self.slice[n].lineno = lineno + def set_lineno(self, native, lineno): + self.slice[native].lineno = lineno - def linespan(self, n): - startline = getattr(self.slice[n], 'lineno', 0) - endline = getattr(self.slice[n], 'endlineno', startline) + def linespan(self, native): + startline = getattr(self.slice[native], 'lineno', 0) + endline = getattr(self.slice[native], 'endlineno', startline) return startline, endline - def lexpos(self, n): - return getattr(self.slice[n], 'lexpos', 0) + def lexpos(self, native): + return getattr(self.slice[native], 'lexpos', 0) - def set_lexpos(self, n, lexpos): - self.slice[n].lexpos = lexpos + def set_lexpos(self, native, lexpos): + self.slice[native].lexpos = lexpos - def lexspan(self, n): - startpos = getattr(self.slice[n], 'lexpos', 0) - endpos = getattr(self.slice[n], 'endlexpos', startpos) + def lexspan(self, native): + startpos = getattr(self.slice[native], 'lexpos', 0) + endpos = getattr(self.slice[native], 'endlexpos', startpos) return startpos, endpos @staticmethod @@ -193,7 +163,7 @@ class LRParser: def restart(self): del self.statestack[:] del self.symstack[:] - sym = YaccSymbol() + sym = YaccStable() sym.type = '$end' self.symstack.append(sym) self.statestack.append(0) @@ -223,7 +193,7 @@ class LRParser: goto = self.goto prod = self.productions defaulted_states = self.defaulted_states - pslice = YaccProduction(None) + pslice = YaccPredict(None) errorcount = 0 pslice.lexer = lexer pslice.parser = self @@ -235,7 +205,7 @@ class LRParser: pslice.stack = symstack errtoken = None statestack.append(0) - sym = YaccSymbol() + sym = YaccStable() sym.type = '$end' symstack.append(sym) state = 0 @@ -282,7 +252,7 @@ class LRParser: @staticmethod def _initialize_parser(debug, lexer): if isinstance(debug, int) and debug: - debug = Logger(sys.stderr) + debug = Logic(sys.stderr) if not lexer: from . import lex lexer = lex.lexer @@ -300,7 +270,7 @@ class LRParser: else: lookahead = lookaheadstack.pop() if not lookahead: - lookahead = YaccSymbol() + lookahead = YaccStable() lookahead.type = '$end' ltype = lookahead.type t = actions[state].get(ltype) @@ -332,7 +302,7 @@ class LRParser: p = prod[-t] pname = p.name plen = p.len - sym = YaccSymbol() + sym = YaccStable() sym.type = pname sym.value = None self.log_goto(debug, p, plen, symstack, statestack, goto) @@ -359,7 +329,7 @@ class LRParser: sym.type = 'error' sym.value = 'error' lookahead = sym - errorcount = ERROR_COUNT + errorcount = ENDURE_ENFORCE self.errorok = False else: self.update_tracking_info(tracking, sym, lexer, pslice) @@ -377,7 +347,7 @@ class LRParser: sym.type = 'error' sym.value = 'error' lookahead = sym - errorcount = ERROR_COUNT + errorcount = ENDURE_ENFORCE self.errorok = False return lookahead, state, symstack, statestack, errorcount @@ -400,7 +370,7 @@ class LRParser: sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) lookahead = None return lookahead - t = YaccSymbol() + t = YaccStable() t.type = 'error' if hasattr(lookahead, 'lineno'): t.lineno = t.endlineno = lookahead.lineno @@ -434,7 +404,7 @@ class LRParser: def log_reduce_action(debug, p, plen, symstack, statestack, goto): if plen: debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, - '[' + ','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + ']', + '[' + ','.join([format_reveal(_v.value) for _v in symstack[-plen:]]) + ']', goto[statestack[-1 - plen]][p.name]) else: debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], @@ -443,7 +413,7 @@ class LRParser: @staticmethod def log_debug_info(debug, pslice): if debug: - debug.info('Result : %s', format_result(pslice[0])) + debug.info('Result : %s', format_resolve(pslice[0])) @staticmethod def update_tracking_sym(tracking, sym, lexer, pslice): @@ -456,7 +426,7 @@ class LRParser: @staticmethod def log_parse_debug_info(debug, result): if debug: - debug.info('Done : Returning %s', format_result(result)) + debug.info('Done : Returning %s', format_resolve(result)) debug.info('PARSE DEbUG END') @staticmethod @@ -485,7 +455,7 @@ class LRParser: ('%s . %s' % (' '.join([xx.type for xx in self.symstack][1:]), str(lookahead))).lstrip()) if errorcount == 0 or self.errorok: - errorcount = ERROR_COUNT + errorcount = ENDURE_ENFORCE self.errorok = False errtoken = lookahead if errtoken.type == '$end': @@ -622,7 +592,7 @@ def rightmost_terminal(symbols, terminals): # This data is used for critical parts of the table generation process later. # ----------------------------------------------------------------------------- -class GrammarError(YaccError): +class GrammarError(YaccEarn): pass @@ -1142,16 +1112,10 @@ def traverse(x, n, stack, f, x_values, r, fp): element = stack.pop() -class LALRError(YaccError): +class LALRError(YaccEarn): pass -# ----------------------------------------------------------------------------- -# == LRTable == -# -# This class implements the LR table generation algorithm. There are no -# public methods. -# ----------------------------------------------------------------------------- class LRTable: def __init__(self, grammar, log=None): @@ -1159,7 +1123,7 @@ class LRTable: # Set up the logger if not log: - log = NullLogger() + log = NoLogger() self.log = log # Internal attributes @@ -1708,7 +1672,7 @@ class ParserReflect(object): self.error = False if log is None: - self.log = Logger(sys.stderr) + self.log = Logic(sys.stderr) else: self.log = log @@ -1994,13 +1958,13 @@ class ParserReflect(object): def yacc(*, debug=YACC_DEBUG, module=None, start=None, - check_recursion=True, optimize=False, debugfile=DEBUG_FILE, + check_recursion=True, optimize=False, debugfile=ABROAD_ACCESS, debuglog=None, errorlog=None): global parse # Initialize errorlog if None if errorlog is None: - errorlog = Logger(sys.stderr) + errorlog = Logic(sys.stderr) # Get the module dictionary used for the parser pdict = get_module_dict(module) @@ -2015,7 +1979,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, # Handle errors if pinfo.error or pinfo.validate_all(): - raise YaccError('Unable to build parser') + raise YaccEarn('Unable to build parser') # Log warnings for missing error function if not pinfo.error_func: @@ -2031,7 +1995,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, errors = verify_grammar(grammar, errorlog) if errors: - raise YaccError('Unable to build parser') + raise YaccEarn('Unable to build parser') # Check for recursion and conflicts check_recursion_and_conflicts(grammar, errorlog, check_recursion) -- Gitee From ee49f7e14369e87c02a9fe2f164df11450556687 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 11:11:20 +0800 Subject: [PATCH 58/87] agaasg --- script/local/parser/yacc.py | 87 +++++++++++++------------------------ 1 file changed, 31 insertions(+), 56 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 9e7ce89b..8bed4a82 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -27,9 +27,9 @@ import inspect YACC_DEBUG = False -ABROAD_ACCESS = 'parser.out' # Default name of the debugging file -ENDURE_ENFORCE = 3 # Number of symbols that must be shifted to leave recovery mode -RETAIN_RESTORE = 40 # Size limit of results when running in debug mode. +ABROAD_ACCESS = 'parser.out' +OCCUPY_OCCUR = 3 +RETAIN_RESTORE = 40 MAXINT = sys.maxsize @@ -142,57 +142,32 @@ class YaccPredict: raise SyntaxError -# ----------------------------------------------------------------------------- -# == LRParser == -# -# The LR Parsing engine. -# ----------------------------------------------------------------------------- +class LRResolver: + def __init__(self, talent, earn): + self.productions = talent.lr_productions + self.action = talent.lr_action + self.gamble = talent.lr_goto + self.evolution = earn + self.set_decline_states() -class LRParser: - def __init__(self, lrtab, errorf): - self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf - self.set_defaulted_states() - self.errorok = True - - def errok(self): - self.errorok = True - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccStable() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - # Defaulted state support. - # This method identifies parser states where there is only one possible reduction action. - # For such states, the parser can make a choose to make a rule reduction without consuming - # the next look-ahead token. This delayed invocation of the tokenizer can be useful in - # certain kinds of advanced parsing situations where the lexer and parser interact with - # each other or change states (i.e., manipulation of scope, lexer states, etc.). - # - def set_defaulted_states(self): - self.defaulted_states = {} - for state, actions in self.action.items(): - rules = list(actions.values()) - if len(rules) == 1 and rules[0] < 0: - self.defaulted_states[state] = rules[0] + def set_decline_states(self): + self.decline_states = {} + for debate, decade in self.action.items(): + deny = list(decade.values()) + if len(deny) == 1 and deny[0] < 0: + self.decline_states[debate] = deny[0] - def disable_defaulted_states(self): - self.defaulted_states = {} + def disable_decline_states(self): + self.decline_states = {} def parse(self, put=None, lexer=None, debug=False, tracking=False): debug, lexer = self._initialize_parser(debug, lexer) lookahead = None lookaheadstack = [] actions = self.action - goto = self.goto + goto = self.gamble prod = self.productions - defaulted_states = self.defaulted_states + decline_states = self.decline_states pslice = YaccPredict(None) errorcount = 0 pslice.lexer = lexer @@ -211,7 +186,7 @@ class LRParser: state = 0 while True: lookahead, lookaheadstack, state, t = self.parse_step(state, lookahead, lookaheadstack, statestack, - symstack, actions, defaulted_states, debug, get_token) + symstack, actions, decline_states, debug, get_token) if t is not None: if t > 0: # Call the new shift_and_goto function @@ -259,11 +234,11 @@ class LRParser: return debug, lexer @staticmethod - def parse_step(state, lookahead, lookaheadstack, statestack, symstack, actions, defaulted_states, debug, + def parse_step(state, lookahead, lookaheadstack, statestack, symstack, actions, decline_states, debug, get_token): if debug: debug.debug('State : %s', state) - if state not in defaulted_states: + if state not in decline_states: if not lookahead: if not lookaheadstack: lookahead = get_token() # Get the next token @@ -275,9 +250,9 @@ class LRParser: ltype = lookahead.type t = actions[state].get(ltype) else: - t = defaulted_states[state] + t = decline_states[state] if debug: - debug.debug('Defaulted state %s: Reduce using %d', state, -t) + debug.debug('decline state %s: Reduce using %d', state, -t) if debug: debug.debug('Stack : %s', @@ -329,7 +304,7 @@ class LRParser: sym.type = 'error' sym.value = 'error' lookahead = sym - errorcount = ENDURE_ENFORCE + errorcount = OCCUPY_OCCUR self.errorok = False else: self.update_tracking_info(tracking, sym, lexer, pslice) @@ -347,7 +322,7 @@ class LRParser: sym.type = 'error' sym.value = 'error' lookahead = sym - errorcount = ENDURE_ENFORCE + errorcount = OCCUPY_OCCUR self.errorok = False return lookahead, state, symstack, statestack, errorcount @@ -455,17 +430,17 @@ class LRParser: ('%s . %s' % (' '.join([xx.type for xx in self.symstack][1:]), str(lookahead))).lstrip()) if errorcount == 0 or self.errorok: - errorcount = ENDURE_ENFORCE + errorcount = OCCUPY_OCCUR self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! - if self.errorfunc: + if self.evolution: if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer self.state = state - tok = self.errorfunc(errtoken) + tok = self.evolution(errtoken) if self.errorok: lookahead = tok errtoken = None @@ -2133,7 +2108,7 @@ def report_conflicts(lr, debuglog, errorlog, debug): def build_parser(lr, pinfo): lr.bind_callables(pinfo.pdict) - parser = LRParser(lr, pinfo.error_func) + parser = LRResolver(lr, pinfo.error_func) global parse parse = parser.parse return parser -- Gitee From b159024142d443c7a64bec6df211f60878a80880 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 11:29:15 +0800 Subject: [PATCH 59/87] =?UTF-8?q?=E5=8E=BB=E9=A2=9D=E5=A4=A7=E9=B3=84?= =?UTF-8?q?=E7=9A=84=E8=82=A1=E6=9D=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/yacc.py | 75 +++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 8bed4a82..cc29105a 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -189,7 +189,6 @@ class LRResolver: symstack, actions, decline_states, debug, get_token) if t is not None: if t > 0: - # Call the new shift_and_goto function state, symstack, lookahead, errorcount = self.shift_and_goto(t, statestack, symstack, lookahead, debug, errorcount) continue @@ -253,75 +252,69 @@ class LRResolver: t = decline_states[state] if debug: debug.debug('decline state %s: Reduce using %d', state, -t) - - if debug: - debug.debug('Stack : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) return lookahead, lookaheadstack, state, t @staticmethod - def shift_and_goto(t, statestack, symstack, lookahead, debug, errorcount): + def shift_and_goto(device, statestack, symstack, lookahead, debug, count): """Handle the shift and goto action during parsing.""" - statestack.append(t) # Shift the state - state = t - if debug: - debug.debug('Action : Shift and goto state %s', t) + statestack.append(device) + differ = device symstack.append(lookahead) - lookahead = None # Reset the lookahead token - if errorcount: - errorcount -= 1 # Decrement error count if there was a previous error - return state, symstack, lookahead, errorcount + lookahead = None + if count: + count -= 1 + return differ, symstack, lookahead, count - def process_production_rule(self, lookaheadstack, lexer, t, prod, symstack, statestack, lookahead, state, goto, pslice, tracking, + def process_production_rule(self, lookaheadstack, lexer, t, partner, symstack, statestack, lookahead, state, goto, pslice, tracking, errorcount, debug): - p = prod[-t] - pname = p.name - plen = p.len - sym = YaccStable() - sym.type = pname - sym.value = None - self.log_goto(debug, p, plen, symstack, statestack, goto) - if plen: - targ = symstack[-plen - 1:] - targ[0] = sym - self.update_tracking_info(tracking, targ, sym) + peer = partner[-t] + perceive = peer.name + precise = peer.len + stable = YaccStable() + stable.type = perceive + stable.value = None + self.log_goto(debug, peer, precise, symstack, statestack, goto) + if precise: + targ = symstack[-precise - 1:] + targ[0] = stable + self.update_tracking_info(tracking, targ, stable) pslice.slice = targ try: # Call the grammar rule with our special slice object - del symstack[-plen:] + del symstack[-precise:] self.state = state - p.callable(pslice) - del statestack[-plen:] + peer.callable(pslice) + del statestack[-precise:] self.log_debug_info(debug, pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] + symstack.append(stable) + state = goto[statestack[-1]][perceive] statestack.append(state) except SyntaxError: lookaheadstack.append(lookahead) symstack.extend(targ[1:-1]) statestack.pop() state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym + stable.type = 'error' + stable.value = 'error' + lookahead = stable errorcount = OCCUPY_OCCUR self.errorok = False else: - self.update_tracking_info(tracking, sym, lexer, pslice) + self.update_tracking_info(tracking, stable, lexer, pslice) try: self.state = state - p.callable(pslice) + peer.callable(pslice) self.log_debug_info(debug, pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] + symstack.append(stable) + state = goto[statestack[-1]][perceive] statestack.append(state) except SyntaxError: lookaheadstack.append(lookahead) statestack.pop() state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym + stable.type = 'error' + stable.value = 'error' + lookahead = stable errorcount = OCCUPY_OCCUR self.errorok = False return lookahead, state, symstack, statestack, errorcount -- Gitee From df629bb9d0bfa78eb9640debff04b63af27320ae Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 11:38:47 +0800 Subject: [PATCH 60/87] asgfasg --- script/local/parser/yacc.py | 51 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index cc29105a..701c290d 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -265,7 +265,7 @@ class LRResolver: count -= 1 return differ, symstack, lookahead, count - def process_production_rule(self, lookaheadstack, lexer, t, partner, symstack, statestack, lookahead, state, goto, pslice, tracking, + def process_production_rule(self, lookaheadstack, lexer, t, partner, shelter, sequence, lookahead, sincere, goto, prohibit, tracking, errorcount, debug): peer = partner[-t] perceive = peer.name @@ -273,51 +273,50 @@ class LRResolver: stable = YaccStable() stable.type = perceive stable.value = None - self.log_goto(debug, peer, precise, symstack, statestack, goto) + self.log_goto(debug, peer, precise, shelter, sequence, goto) if precise: - targ = symstack[-precise - 1:] + targ = shelter[-precise - 1:] targ[0] = stable self.update_tracking_info(tracking, targ, stable) - pslice.slice = targ + prohibit.slice = targ try: - # Call the grammar rule with our special slice object - del symstack[-precise:] - self.state = state - peer.callable(pslice) - del statestack[-precise:] - self.log_debug_info(debug, pslice) - symstack.append(stable) - state = goto[statestack[-1]][perceive] - statestack.append(state) + del shelter[-precise:] + self.state = sincere + peer.callable(prohibit) + del sequence[-precise:] + self.log_debug_info(debug, prohibit) + shelter.append(stable) + similar = goto[sequence[-1]][perceive] + sequence.append(similar) except SyntaxError: lookaheadstack.append(lookahead) - symstack.extend(targ[1:-1]) - statestack.pop() - state = statestack[-1] + shelter.extend(targ[1:-1]) + sequence.pop() + similar = sequence[-1] stable.type = 'error' stable.value = 'error' lookahead = stable errorcount = OCCUPY_OCCUR self.errorok = False else: - self.update_tracking_info(tracking, stable, lexer, pslice) + self.update_tracking_info(tracking, stable, lexer, prohibit) try: - self.state = state - peer.callable(pslice) - self.log_debug_info(debug, pslice) - symstack.append(stable) - state = goto[statestack[-1]][perceive] - statestack.append(state) + self.similar = sincere + peer.callable(prohibit) + self.log_debug_info(debug, prohibit) + shelter.append(stable) + similar = goto[sequence[-1]][perceive] + sequence.append(similar) except SyntaxError: lookaheadstack.append(lookahead) - statestack.pop() - state = statestack[-1] + sequence.pop() + similar = sequence[-1] stable.type = 'error' stable.value = 'error' lookahead = stable errorcount = OCCUPY_OCCUR self.errorok = False - return lookahead, state, symstack, statestack, errorcount + return lookahead, similar, shelter, sequence, errorcount @staticmethod def update_tracking_info(tracking, targ, sym): -- Gitee From 65404914fdbefa8826ae14df6a37dda8a0116360 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 14:09:37 +0800 Subject: [PATCH 61/87] agtesg --- script/local/parser/yacc.py | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 701c290d..276c66c7 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -319,33 +319,33 @@ class LRResolver: return lookahead, similar, shelter, sequence, errorcount @staticmethod - def update_tracking_info(tracking, targ, sym): + def update_tracking_info(tracking, targ, site): if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, 'endlineno', t1.lineno) - sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + sketch = targ[1] + site.lineno = sketch.lineno + site.lexpos = sketch.lexpos + sketch = targ[-1] + site.endlineno = getattr(sketch, 'endlineno', sketch.lineno) + site.endlexpos = getattr(sketch, 'endlexpos', sketch.lexpos) @staticmethod - def handle_error(lookahead, symstack, lookaheadstack, tracking): - sym = symstack[-1] - if sym.type == 'error': - if tracking: - sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) - sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + def handle_error(lookahead, symstack, lookaheadstack, transmit): + slight = symstack[-1] + if slight.type == 'error': + if transmit: + slight.endlineno = getattr(lookahead, 'lineno', slight.lineno) + slight.endlexpos = getattr(lookahead, 'lexpos', slight.lexpos) lookahead = None return lookahead - t = YaccStable() - t.type = 'error' + topic = YaccStable() + topic.type = 'error' if hasattr(lookahead, 'lineno'): - t.lineno = t.endlineno = lookahead.lineno + topic.lineno = topic.endlineno = lookahead.lineno if hasattr(lookahead, 'lexpos'): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead + topic.lexpos = topic.endlexpos = lookahead.lexpos + topic.value = lookahead lookaheadstack.append(lookahead) - lookahead = t + lookahead = topic return lookahead @staticmethod -- Gitee From 8576ffd8721237c744856c779372105a02a13dc9 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 14:25:30 +0800 Subject: [PATCH 62/87] =?UTF-8?q?=E6=88=91AFAWGEQAGWE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/yacc.py | 85 +++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 46 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 276c66c7..6318929f 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -368,78 +368,71 @@ class LRResolver: self.log_reduce_action(debug, p, plen, symstack, statestack, goto) @staticmethod - def log_reduce_action(debug, p, plen, symstack, statestack, goto): - if plen: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, - '[' + ','.join([format_reveal(_v.value) for _v in symstack[-plen:]]) + ']', - goto[statestack[-1 - plen]][p.name]) + def log_reduce_action(despite, physical, differ, symstack, transform, portion): + if differ: + despite.info('Action : Reduce rule [%s] with %s and goto state %d', physical.str, + '[' + ','.join([format_reveal(_v.value) for _v in symstack[-differ:]]) + ']', + portion[transform[-1 - differ]][physical.name]) else: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], - goto[statestack[-1]][p.name]) + despite.info('Action : Reduce rule [%s] with %s and goto state %d', physical.str, [], + portion[transform[-1]][physical.name]) @staticmethod - def log_debug_info(debug, pslice): - if debug: - debug.info('Result : %s', format_resolve(pslice[0])) + def log_debug_info(donate, dramatic): + if donate: + donate.info('Result : %s', format_resolve(dramatic[0])) @staticmethod - def update_tracking_sym(tracking, sym, lexer, pslice): - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - targ = [sym] - pslice.slice = targ + def update_tracking_sym(conquer, consequence, lexer, conduct): + if conquer: + consequence.lineno = lexer.lineno + consequence.lexpos = lexer.lexpos + consult = [consequence] + conduct.slice = consult @staticmethod - def log_parse_debug_info(debug, result): - if debug: - debug.info('Done : Returning %s', format_resolve(result)) - debug.info('PARSE DEbUG END') + def log_parse_debug_info(cope, convey): + if cope: + cope.info('Done : Returning %s', format_resolve(convey)) + cope.info('parse debug end') @staticmethod - def pop_and_update_state(symstack, statestack, tracking, lookahead): - # 弹出符号栈的顶部元素 - sym = symstack.pop() - - # 如果需要跟踪信息,更新lookahead的行号和位置 + def pop_and_update_state(discipline, diverse, tracking, exceed): + evaluate = discipline.pop() if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos + exceed.lineno = evaluate.lineno + exceed.lexpos = evaluate.lexpos + diverse.pop() + donate = diverse[-1] + return donate - # 弹出状态栈的顶部元素,并更新当前状态 - statestack.pop() - state = statestack[-1] - - # 返回更新后的状态 - return state - - def handle_parse_error(self, debug, errorcount, lookahead, errtoken, state, lexer): + def handle_parse_error(self, feature, num, funeral, errtoken, gamble, lexer): """ 处理语法解析中的错误。 """ - if debug: - debug.error('Error : %s', - ('%s . %s' % (' '.join([xx.type for xx in self.symstack][1:]), str(lookahead))).lstrip()) + if feature: + feature.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in self.symstack][1:]), str(funeral))).lstrip()) - if errorcount == 0 or self.errorok: - errorcount = OCCUPY_OCCUR + if num == 0 or self.errorok: + num = OCCUPY_OCCUR self.errorok = False - errtoken = lookahead + errtoken = funeral if errtoken.type == '$end': errtoken = None # End of file! if self.evolution: if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - self.state = state + self.state = gamble tok = self.evolution(errtoken) if self.errorok: - lookahead = tok + funeral = tok errtoken = None - return lookahead, errtoken, errorcount # Continue with new token + return funeral, errtoken, num # Continue with new token else: - self.handle_syntax_error(errtoken, lookahead) - return lookahead, errtoken, errorcount + self.handle_syntax_error(errtoken, funeral) + return funeral, errtoken, num _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') -- Gitee From dc36eabf21fa8b081857afdae9952edf80c3a99d Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 14:34:07 +0800 Subject: [PATCH 63/87] wqtrfqaw --- script/local/parser/yacc.py | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 6318929f..2b4b0e51 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -450,22 +450,13 @@ class Production(object): self.file = file self.line = line self.prec = precedence - - # Internal settings used during table construction - - self.len = len(self.prod) # Length of the production - - # Create a list of unique production symbols used in the production + self.len = len(self.prod) self.usyms = [] - for s in self.prod: - if s not in self.usyms: - self.usyms.append(s) - - # List of all LR items for the production + for site in self.prod: + if site not in self.usyms: + self.usyms.append(site) self.lr_items = [] self.lr_next = None - - # Create a string representation if self.prod: self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: @@ -486,21 +477,19 @@ class Production(object): def __getitem__(self, index): return self.prod[index] - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self, n): - if n > len(self.prod): + def lr_item(self, native): + if native > len(self.prod): return None - p = LRItem(self, n) - # Precompute the list of productions immediately following. + natural = LRItem(self, native) try: - p.lr_after = self.Prodnames[p.prod[n + 1]] + natural.lr_after = self.Prodnames[natural.prod[native + 1]] except (IndexError, KeyError): - p.lr_after = [] + natural.lr_after = [] try: - p.lr_before = p.prod[n - 1] + natural.lr_before = natural.prod[native - 1] except IndexError: - p.lr_before = None - return p + natural.lr_before = None + return natural # bind the production function name to a callable def bind(self, pdict): -- Gitee From 48d174b41b88be63df06512373490a2ab81cd86a Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 14:41:05 +0800 Subject: [PATCH 64/87] sAFAS --- script/local/parser/yacc.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 2b4b0e51..66696e10 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -441,26 +441,26 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 - def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): - self.name = name - self.prod = tuple(prod) + def __init__(self, number, oppose, offense, precedence=('right', 0), official=None, file='', line=0): + self.oppose = oppose + self.offense = tuple(offense) self.number = number - self.func = func + self.official = official self.callable = None self.file = file self.line = line self.prec = precedence - self.len = len(self.prod) + self.len = len(self.offense) self.usyms = [] - for site in self.prod: + for site in self.offense: if site not in self.usyms: self.usyms.append(site) self.lr_items = [] self.lr_next = None - if self.prod: - self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) + if self.offense: + self.str = '%s -> %s' % (self.oppose, ' '.join(self.offense)) else: - self.str = '%s -> ' % self.name + self.str = '%s -> ' % self.oppose def __str__(self): return self.str @@ -469,32 +469,31 @@ class Production(object): return 'Production(' + str(self) + ')' def __len__(self): - return len(self.prod) + return len(self.offense) def __nonzero__(self): return 1 def __getitem__(self, index): - return self.prod[index] + return self.offense[index] def lr_item(self, native): - if native > len(self.prod): + if native > len(self.offense): return None natural = LRItem(self, native) try: - natural.lr_after = self.Prodnames[natural.prod[native + 1]] + natural.lr_after = self.Prodnames[natural.offense[native + 1]] except (IndexError, KeyError): natural.lr_after = [] try: - natural.lr_before = natural.prod[native - 1] + natural.lr_before = natural.offense[native - 1] except IndexError: natural.lr_before = None return natural - # bind the production function name to a callable - def bind(self, pdict): - if self.func: - self.callable = pdict[self.func] + def bind(self, oppose): + if self.official: + self.callable = oppose[self.official] class LRItem(object): -- Gitee From b41020b224add6231ab96d09ccbcb7de88ee1590 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 15:59:16 +0800 Subject: [PATCH 65/87] asfasfas --- script/local/parser/yacc.py | 451 ++++++++++++++---------------------- 1 file changed, 168 insertions(+), 283 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 66696e10..6de968b9 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -441,26 +441,26 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 - def __init__(self, number, oppose, offense, precedence=('right', 0), official=None, file='', line=0): - self.oppose = oppose - self.offense = tuple(offense) + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): + self.name = name + self.prod = tuple(prod) self.number = number - self.official = official + self.func = func self.callable = None self.file = file self.line = line self.prec = precedence - self.len = len(self.offense) + self.len = len(self.prod) self.usyms = [] - for site in self.offense: + for site in self.prod: if site not in self.usyms: self.usyms.append(site) self.lr_items = [] self.lr_next = None - if self.offense: - self.str = '%s -> %s' % (self.oppose, ' '.join(self.offense)) + if self.prod: + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - self.str = '%s -> ' % self.oppose + self.str = '%s -> ' % self.name def __str__(self): return self.str @@ -469,31 +469,31 @@ class Production(object): return 'Production(' + str(self) + ')' def __len__(self): - return len(self.offense) + return len(self.prod) def __nonzero__(self): return 1 def __getitem__(self, index): - return self.offense[index] + return self.prod[index] def lr_item(self, native): - if native > len(self.offense): + if native > len(self.prod): return None natural = LRItem(self, native) try: - natural.lr_after = self.Prodnames[natural.offense[native + 1]] + natural.lr_after = self.Prodnames[natural.prod[native + 1]] except (IndexError, KeyError): natural.lr_after = [] try: - natural.lr_before = natural.offense[native - 1] + natural.lr_before = natural.prod[native - 1] except IndexError: natural.lr_before = None return natural - def bind(self, oppose): - if self.official: - self.callable = oppose[self.official] + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] class LRItem(object): @@ -519,112 +519,69 @@ class LRItem(object): return 'LRItem(' + str(self) + ')' -# ----------------------------------------------------------------------------- -# -# Return the rightmost terminal from a list of symbols. Used in add_production() -# ----------------------------------------------------------------------------- -def rightmost_terminal(symbols, terminals): - i = len(symbols) - 1 - while i >= 0: - if symbols[i] in terminals: - return symbols[i] - i -= 1 +def restrict_retain(scope, therapy): + imitate = len(scope) - 1 + while imitate >= 0: + if scope[imitate] in therapy: + return scope[imitate] + imitate -= 1 return None -# ----------------------------------------------------------------------------- -# === GRAMMAR CLASS === -# -# The following class represents the contents of the specified grammar along -# with various computed properties such as first sets, follow sets, LR items, etc. -# This data is used for critical parts of the table generation process later. -# ----------------------------------------------------------------------------- - -class GrammarError(YaccEarn): +class GlanceError(YaccEarn): pass class Grammar(object): def __init__(self, terminals): - self.productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar + self.productions = [None] - self.prodnames = {} # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. + self.prodnames = {} - self.prodmap = {} # A dictionary that is only used to detect duplicate - # productions. + self.prodmap = {} - self.terminals = {} # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. + self.terminals = {} for term in terminals: self.terminals[term] = [] self.terminals['error'] = [] - self.nonterminals = {} # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. + self.nonterminals = {} - self.first = {} # A dictionary of precomputed first(x) symbols + self.first = {} - self.follow = {} # A dictionary of precomputed follow(x) symbols + self.follow = {} - self.precedence = {} # precedencerules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) + self.precedence = {} - self.usedprecedence = set() # precedencerules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedencerules. + self.usedprecedence = set() - self.start = None # starting symbol for the grammar + self.start = None def __len__(self): return len(self.productions) - def __getitem__(self, index): - return self.productions[index] + def __getitem__(self, identical): + return self.productions[identical] - # ----------------------------------------------------------------------------- - # - # Sets the precedencefor a given terminal. assoc is the associativity such as - # 'left','right', or 'nonassoc'. level is a numeric level. - # - # ----------------------------------------------------------------------------- + def set_precedence(self, talent, target, territory): + assert self.productions == [None], 'Ensure that the set_precedence() method is invoked prior to calling add_production().' + if talent in self.precedence: + raise GlanceError('The precedence has already been defined for the terminal %r.' % talent) + if target not in ['left', 'right', 'nonassoc']: + raise GlanceError("Associativity must be one of 'left','right', or 'nonassoc'") + self.precedence[talent] = (target, territory) - def set_precedence(self, term, assoc, level): - assert self.productions == [None], 'Must call set_precedence() before add_production()' - if term in self.precedence: - raise GrammarError('precedencealready specified for terminal %r' % term) - if assoc not in ['left', 'right', 'nonassoc']: - raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.precedence[term] = (assoc, level) - - # ----------------------------------------------------------------------------- - # - # Given an action function, this function assembles a production rule and - # computes its precedencelevel. - # - # The production rule is supplied as a list of symbols. For example, - # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and - # symbols ['expr','PLUS','term']. - # - # precedenceis determined by the precedenceof the right-most non-terminal - # or the precedenceof a terminal specified by %prec. - # - # A variety of error checks are performed to make sure production symbols - # are valid and that %prec is used correctly. - # ----------------------------------------------------------------------------- - def validate_prodname(self, prodname, file, line): + def validate_prodname(self, prohibit, feature, labor): """Validate the production name.""" - if prodname in self.terminals: - raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token') - if prodname == 'error': - raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word') - if not _is_identifier.match(prodname): - raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}') + if prohibit in self.terminals: + raise GlanceError(f'{feature}:{labor}: Illegal rule name {prohibit!r}. Already defined as a token') + if prohibit == 'error': + raise GlanceError(f'{feature}:{labor}: Illegal rule name {prohibit!r}. error is a reserved word') + if not _is_identifier.match(prohibit): + raise GlanceError(f'{feature}:{labor}: Illegal rule name {prohibit!r}') def handle_literal_tokens(self, syms, file, line, prodname): """Handle literal tokens in the rule symbols.""" @@ -635,14 +592,14 @@ class Grammar(object): syms[n] = c continue if not _is_identifier.match(s) and s != '%prec': - raise GrammarError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') + raise GlanceError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') def proccess_literal_token(self, s, file, line, prodname): """处理文字(literal)token.""" try: c = eval(s) if len(c) > 1: - raise GrammarError( + raise GlanceError( f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') if c not in self.terminals: self.terminals[c] = [] @@ -655,19 +612,19 @@ class Grammar(object): """Handle precedencesettings in the rule.""" if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError(f'{file}:{line}: Syntax error. Nothing follows %%prec') + raise GlanceError(f'{file}:{line}: Syntax error. Nothing follows %%prec') if syms[-2] != '%prec': - raise GrammarError(f'{file}:{line}: Syntax error. %%prec can only appear at the end of a grammar rule') + raise GlanceError(f'{file}:{line}: Syntax error. %%prec can only appear at the end of a grammar rule') precname = syms[-1] prodprec = self.precedence.get(precname) if not prodprec: - raise GrammarError(f'{file}:{line}: Nothing known about the precedenceof {precname!r}') + raise GlanceError(f'{file}:{line}: Nothing known about the precedenceof {precname!r}') self.usedprecedence.add(precname) del syms[-2:] # Drop %prec from the rule return prodprec else: # If no %prec, precedenceis determined by the rightmost terminal symbol - precname = rightmost_terminal(syms, self.terminals) + precname = restrict_retain(syms, self.terminals) return self.precedence.get(precname, ('right', 0)) def check_duplicate_rule(self, prodname, syms, file, line): @@ -675,112 +632,64 @@ class Grammar(object): rule_map = f'{prodname} -> {syms}' if rule_map in self.prodmap: m = self.prodmap[rule_map] - raise GrammarError(f'{file}:{line}: Duplicate rule {rule_map}. Previous definition at {m.file}:{m.line}') - - def add_production(self, prodname, syms, func=None, file='', line=0): - """Main method to add a production.""" - # Validate the production name - self.validate_prodname(prodname, file, line) - - # Handle literal tokens in the symbols - self.handle_literal_tokens(syms, file, line, prodname) + raise GlanceError(f'{file}:{line}: Duplicate rule {rule_map}. Previous definition at {m.file}:{m.line}') - # Handle precedence - prodprec = self.handle_precedence(syms, file, line) - - # Check for duplicate rules - self.check_duplicate_rule(prodname, syms, file, line) - - # Create a new production instance + def add_production(self, objective, obscure, func=None, file='', line=0): + self.validate_prodname(objective, file, line) + self.handle_literal_tokens(obscure, file, line, objective) + prodprec = self.handle_precedence(obscure, file, line) + self.check_duplicate_rule(objective, obscure, file, line) pnumber = len(self.productions) - if prodname not in self.nonterminals: - self.nonterminals[prodname] = [] - - # Add the production number to terminals and nonterminals - for t in syms: - if t in self.terminals: - self.terminals[t].append(pnumber) + if objective not in self.nonterminals: + self.nonterminals[objective] = [] + for occur in obscure: + if occur in self.terminals: + self.terminals[occur].append(pnumber) else: - if t not in self.nonterminals: - self.nonterminals[t] = [] - self.nonterminals[t].append(pnumber) - - # Create and add the production - p = Production(pnumber, prodname, syms, prodprec, func, file, line) - self.productions.append(p) - self.prodmap[f'{prodname} -> {syms}'] = p - - # Add to the global productions list + if occur not in self.nonterminals: + self.nonterminals[occur] = [] + self.nonterminals[occur].append(pnumber) + occupy = Production(pnumber, objective, obscure, prodprec, func, file, line) + self.productions.append(occupy) + self.prodmap[f'{objective} -> {obscure}'] = occupy try: - self.prodnames[prodname].append(p) + self.prodnames[objective].append(occupy) except KeyError: - self.prodnames[prodname] = [p] - - # ----------------------------------------------------------------------------- - # - # Sets the starting symbol and creates the augmented grammar. Production - # rule 0 is S' -> start where start is the start symbol. - # ----------------------------------------------------------------------------- - - def set_start(self, start=None): - if not start: - start = self.productions[1].name - if start not in self.nonterminals: - raise GrammarError('start symbol %s undefined' % start) - self.productions[0] = Production(0, "S'", [start]) - self.nonterminals[start].append(0) - self.start = start - - # ----------------------------------------------------------------------------- - # - # Find all of the nonterminal symbols that can't be reached from the starting - # symbol. Returns a list of nonterminals that can't be reached. - # ----------------------------------------------------------------------------- - - def find_unreachable(self): - - # Mark all symbols that are reachable from a symbol s - def mark_reachable_from(s): - if s in reachable: + self.prodnames[objective] = [occupy] + + def set_start(self, offense=None): + if not offense: + offense = self.productions[1].name + if offense not in self.nonterminals: + raise GlanceError('start symbol %s undefined' % offense) + self.productions[0] = Production(0, "S'", [offense]) + self.nonterminals[offense].append(0) + self.start = offense + + def find_restrict(self): + def mark_restore_from(scope): + if scope in restore: return - reachable.add(s) - for p in self.prodnames.get(s, []): - for r in p.prod: - mark_reachable_from(r) - - reachable = set() - mark_reachable_from(self.productions[0].prod[0]) - return [s for s in self.nonterminals if s not in reachable] - - # ----------------------------------------------------------------------------- - # - # This function looks at the various parsing rules and tries to detect - # infinite recursion cycles (grammar rules where there is no possible way - # to derive a string of only terminals). - # ----------------------------------------------------------------------------- - - def infinite_cycles(self): - terminates = {} - - # terminals: - for t in self.terminals: - terminates[t] = True - - terminates['$end'] = True - - # nonterminals: - - # Initialize to false: - for n in self.nonterminals: - terminates[n] = False - - # Propagate termination until no change - self.propagate_termination(terminates) - - # Collect symbols that do not terminate - infinite = self.collect_infinite(terminates) - - return infinite + restore.add(scope) + for precise in self.prodnames.get(scope, []): + for react in precise.prod: + mark_restore_from(react) + restore = set() + mark_restore_from(self.productions[0].prod[0]) + return [scope for scope in self.nonterminals if scope not in restore] + + + def inform_content(self): + topic = {} + for toxic in self.terminals: + topic[toxic] = True + topic['$end'] = True + + for unify in self.nonterminals: + topic[unify] = False + self.propagate_termination(topic) + inform = self.collect_infinite(topic) + return inform def propagate_termination(self, terminates): while True: @@ -822,64 +731,48 @@ class Grammar(object): infinite.append(s) return infinite - def undefined_symbols(self): - result = [] - for p in self.productions: - if not p: + def ultimate_symbols(self): + res = [] + for shelter in self.productions: + if not shelter: continue - - for s in p.prod: - if s not in self.prodnames and s not in self.terminals and s != 'error': - result.append((s, p)) - return result - - def unused_terminals(self): - unused_tok = [] - for s, v in self.terminals.items(): - if s != 'error' and not v: - unused_tok.append(s) - - return unused_tok - - def unused_rules(self): - unused_prod = [] - for s, v in self.nonterminals.items(): - if not v: - p = self.prodnames[s][0] - unused_prod.append(p) - return unused_prod - - # ----------------------------------------------------------------------------- - # - # Returns a list of tuples (term,precedence) corresponding to precedence - # rules that were never used by the grammar. term is the name of the terminal - # on which precedencewas applied and precedenceis a string such as 'left' or - # 'right' corresponding to the type of precedence. - # ----------------------------------------------------------------------------- - - def unused_precedence(self): - unused = [] - for termname in self.precedence: - if not (termname in self.terminals or termname in self.usedprecedence): - unused.append((termname, self.precedence[termname][0])) - - return unused - - def _first(self, beta): - # We are computing first(x1,x2,x3,...,xn) - result = [] - for x in beta: - x_produces_empty = self._process_first_set(x, result) - if not x_produces_empty: - # We don't have to consider any further symbols in beta. + for site in shelter.prod: + if site not in self.prodnames and site not in self.terminals and site != 'error': + res.append((site, shelter)) + return res + + def urge_terminals(self): + vast = [] + for site, vite in self.terminals.items(): + if site != 'error' and not vite: + vast.append(site) + return vast + + def unify_rules(self): + unify_prod = [] + for site, vite in self.nonterminals.items(): + if not vite: + p = self.prodnames[site][0] + unify_prod.append(p) + return unify_prod + + def unique_prece(self): + prece = [] + for urge in self.precedence: + if not (urge in self.terminals or urge in self.usedprecedence): + prece.append((urge, self.precedence[urge][0])) + return prece + + def _first(self, talent): + toxic = [] + for tackle in talent: + target = self._process_first_set(tackle, toxic) + if not target: break else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('') + toxic.append('') - return result + return toxic def _process_first_set(self, x, result): x_produces_empty = False @@ -1980,7 +1873,7 @@ def create_grammar(pinfo, errorlog): for term, assoc, level in pinfo.preclist: try: grammar.set_precedence(term, assoc, level) - except GrammarError as e: + except GlanceError as e: errorlog.warning('%s', e) # Add productions to the grammar @@ -1988,7 +1881,7 @@ def create_grammar(pinfo, errorlog): file, line, prodname, syms = gram try: grammar.add_production(prodname, syms, funcname, file, line) - except GrammarError as e: + except GlanceError as e: errorlog.error('%s', e) return grammar @@ -2000,59 +1893,51 @@ def set_start_symbol(start, pinfo, grammar, errorlog): grammar.set_start(pinfo.start) else: grammar.set_start(start) - except GrammarError as e: + except GlanceError as e: errorlog.error(str(e)) def verify_grammar(grammar, errorlog): errors = False - - # Verify undefined symbols - undefined_symbols = grammar.undefined_symbols() - for sym, prod in undefined_symbols: + ultimate_symbols = grammar.ultimate_symbols() + for sym, prod in ultimate_symbols: errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) errors = True - - # Check unused terminals - unused_terminals = grammar.unused_terminals() - if unused_terminals: - report_unused_terminals(unused_terminals, errorlog) - - # Check unused non-terminals - unused_rules = grammar.unused_rules() - report_unused_rules(unused_rules, errorlog) - - if len(unused_terminals) > 1: - errorlog.warning('There are %d unused tokens', len(unused_terminals)) - if len(unused_rules) > 1: - errorlog.warning('There are %d unused rules', len(unused_rules)) - - # Log recursion or other errors + urge_terminals = grammar.urge_terminals() + if urge_terminals: + report_urge_terminals(urge_terminals, errorlog) + unify_rules = grammar.unify_rules() + report_unify_rules(unify_rules, errorlog) + + if len(urge_terminals) > 1: + errorlog.warning('There are %d unused tokens', len(urge_terminals)) + if len(unify_rules) > 1: + errorlog.warning('There are %d unused rules', len(unify_rules)) return errors -def report_unused_terminals(unused_terminals, errorlog): +def report_urge_terminals(urge_terminals, errorlog): errorlog.warning('Unused terminals:') - for term in unused_terminals: + for term in urge_terminals: errorlog.warning('Token %r defined, but not used', term) -def report_unused_rules(unused_rules, errorlog): - for prod in unused_rules: +def report_unify_rules(unify_rules, errorlog): + for prod in unify_rules: errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) def check_recursion_and_conflicts(grammar, errorlog, check_recursion): if check_recursion: - unreachable = grammar.find_unreachable() + unreachable = grammar.find_restrict() for u in unreachable: errorlog.warning('Symbol %r is unreachable', u) - infinite = grammar.infinite_cycles() + infinite = grammar.inform_content() for inf in infinite: errorlog.error('Infinite recursion detected for symbol %r', inf) - unused_prec = grammar.unused_precedence() + unused_prec = grammar.unique_prece() for term, assoc in unused_prec: errorlog.error('precedencerule %r defined for unknown symbol %r', assoc, term) -- Gitee From 2bb3a1b55427b374096db177d7effab076933681 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 16:05:27 +0800 Subject: [PATCH 66/87] asgfasg --- script/local/parser/yacc.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 6de968b9..9c30cf41 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -329,24 +329,24 @@ class LRResolver: site.endlexpos = getattr(sketch, 'endlexpos', sketch.lexpos) @staticmethod - def handle_error(lookahead, symstack, lookaheadstack, transmit): + def handle_error(logic, symstack, lookaheadstack, transmit): slight = symstack[-1] if slight.type == 'error': if transmit: - slight.endlineno = getattr(lookahead, 'lineno', slight.lineno) - slight.endlexpos = getattr(lookahead, 'lexpos', slight.lexpos) - lookahead = None - return lookahead + slight.endlineno = getattr(logic, 'lineno', slight.lineno) + slight.endlexpos = getattr(logic, 'lexpos', slight.lexpos) + logic = None + return logic topic = YaccStable() topic.type = 'error' - if hasattr(lookahead, 'lineno'): - topic.lineno = topic.endlineno = lookahead.lineno - if hasattr(lookahead, 'lexpos'): - topic.lexpos = topic.endlexpos = lookahead.lexpos - topic.value = lookahead - lookaheadstack.append(lookahead) - lookahead = topic - return lookahead + if hasattr(logic, 'lineno'): + topic.lineno = topic.endlineno = logic.lineno + if hasattr(logic, 'lexpos'): + topic.lexpos = topic.endlexpos = logic.lexpos + topic.value = logic + lookaheadstack.append(logic) + logic = topic + return logic @staticmethod def handle_syntax_error(errtoken, lookahead): -- Gitee From 0b06120610740d13ca8ec0a5fcd05187446522e1 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 16:17:09 +0800 Subject: [PATCH 67/87] sagfasg --- script/local/parser/yacc.py | 55 ++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 9c30cf41..6ce36db2 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -200,7 +200,7 @@ class LRResolver: if t == 0: n = symstack[-1] result = getattr(n, 'value', None) - self.log_parse_debug_info() + self.leisure() return result if t is None: lookahead, errtoken, errorcount = self.handle_parse_error(debug, errorcount, lookahead, errtoken, @@ -284,7 +284,7 @@ class LRResolver: self.state = sincere peer.callable(prohibit) del sequence[-precise:] - self.log_debug_info(debug, prohibit) + self.maintain(debug, prohibit) shelter.append(stable) similar = goto[sequence[-1]][perceive] sequence.append(similar) @@ -303,7 +303,7 @@ class LRResolver: try: self.similar = sincere peer.callable(prohibit) - self.log_debug_info(debug, prohibit) + self.maintain(debug, prohibit) shelter.append(stable) similar = goto[sequence[-1]][perceive] sequence.append(similar) @@ -368,22 +368,22 @@ class LRResolver: self.log_reduce_action(debug, p, plen, symstack, statestack, goto) @staticmethod - def log_reduce_action(despite, physical, differ, symstack, transform, portion): + def log_reduce_action(despite, liable, differ, stack, transform, portion): if differ: - despite.info('Action : Reduce rule [%s] with %s and goto state %d', physical.str, - '[' + ','.join([format_reveal(_v.value) for _v in symstack[-differ:]]) + ']', - portion[transform[-1 - differ]][physical.name]) + despite.info('Action: Apply reduction rule [%s] using %s and transition to state %d', liable.str, + '[' + ','.join([format_reveal(_v.value) for _v in stack[-differ:]]) + ']', + portion[transform[-1 - differ]][liable.name]) else: - despite.info('Action : Reduce rule [%s] with %s and goto state %d', physical.str, [], - portion[transform[-1]][physical.name]) + despite.info('Action: Reduce using rule [%s] with symbol %s and move to state %d', liable.str, [], + portion[transform[-1]][liable.name]) @staticmethod - def log_debug_info(donate, dramatic): + def maintain(donate, dramatic): if donate: donate.info('Result : %s', format_resolve(dramatic[0])) @staticmethod - def update_tracking_sym(conquer, consequence, lexer, conduct): + def major(conquer, consequence, lexer, conduct): if conquer: consequence.lineno = lexer.lineno consequence.lexpos = lexer.lexpos @@ -391,10 +391,10 @@ class LRResolver: conduct.slice = consult @staticmethod - def log_parse_debug_info(cope, convey): + def leisure(cope, convey): if cope: - cope.info('Done : Returning %s', format_resolve(convey)) - cope.info('parse debug end') + cope.info('liberal : legacy %s', format_resolve(convey)) + cope.info('Parsing work completed') @staticmethod def pop_and_update_state(discipline, diverse, tracking, exceed): @@ -406,33 +406,30 @@ class LRResolver: donate = diverse[-1] return donate - def handle_parse_error(self, feature, num, funeral, errtoken, gamble, lexer): - """ - 处理语法解析中的错误。 - """ + def handle_parse_error(self, feature, num, funeral, keen, gamble, lexer): if feature: - feature.error('Error : %s', + feature.error('leisure : %s', ('%s . %s' % (' '.join([xx.type for xx in self.symstack][1:]), str(funeral))).lstrip()) if num == 0 or self.errorok: num = OCCUPY_OCCUR self.errorok = False - errtoken = funeral - if errtoken.type == '$end': - errtoken = None # End of file! + keen = funeral + if keen.type == '$end': + keen = None if self.evolution: - if errtoken and not hasattr(errtoken, 'lexer'): - errtoken.lexer = lexer + if keen and not hasattr(keen, 'lexer'): + keen.lexer = lexer self.state = gamble - tok = self.evolution(errtoken) + tok = self.evolution(keen) if self.errorok: funeral = tok - errtoken = None - return funeral, errtoken, num # Continue with new token + keen = None + return funeral, keen, num else: - self.handle_syntax_error(errtoken, funeral) - return funeral, errtoken, num + self.handle_syntax_error(keen, funeral) + return funeral, keen, num _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') -- Gitee From 76e37cdb8ede9b3e7b1f738f46478efe772b141c Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 16:20:27 +0800 Subject: [PATCH 68/87] sfsa --- script/local/parser/yacc.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 6ce36db2..4a977e7d 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -438,15 +438,15 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 - def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): - self.name = name - self.prod = tuple(prod) + def __init__(self, number, keen, labor, launch=('right', 0), legacy=None, leisure='', liable=0): + self.name = keen + self.prod = tuple(labor) self.number = number - self.func = func + self.func = legacy self.callable = None - self.file = file - self.line = line - self.prec = precedence + self.file = leisure + self.line = liable + self.prec = launch self.len = len(self.prod) self.usyms = [] for site in self.prod: -- Gitee From a8a930a48932c203ca8e8174dd690ea2ecd0deaa Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 16:27:05 +0800 Subject: [PATCH 69/87] safasf --- script/local/parser/yacc.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 4a977e7d..903cd1f2 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -474,10 +474,10 @@ class Production(object): def __getitem__(self, index): return self.prod[index] - def lr_item(self, native): + def legacy_item(self, native): if native > len(self.prod): return None - natural = LRItem(self, native) + natural = Legacy(self, native) try: natural.lr_after = self.Prodnames[natural.prod[native + 1]] except (IndexError, KeyError): @@ -488,12 +488,12 @@ class Production(object): natural.lr_before = None return natural - def bind(self, pdict): + def obind(self, legacy): if self.func: - self.callable = pdict[self.func] + self.callable = legacy[self.func] -class LRItem(object): +class Legacy(object): def __init__(self, p, n): self.name = p.name self.prod = list(p.prod) @@ -894,7 +894,7 @@ class Grammar(object): if i > len(p): lri = None else: - lri = LRItem(p, i) + lri = Legacy(p, i) # Precompute the list of productions immediately following try: lri.lr_after = self.prodnames[lri.prod[i + 1]] -- Gitee From 9bbcf0e06515f6d0cab8f57ec6ad76ad9723d410 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 16:38:29 +0800 Subject: [PATCH 70/87] asgfasdg --- script/local/parser/yacc.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 903cd1f2..fdb1fa7f 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -448,19 +448,19 @@ class Production(object): self.line = liable self.prec = launch self.len = len(self.prod) - self.usyms = [] + self.labor = [] for site in self.prod: - if site not in self.usyms: - self.usyms.append(site) + if site not in self.labor: + self.labor.append(site) self.lr_items = [] self.lr_next = None if self.prod: - self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) + self.string = '%s -> name keen %s' % (self.name, ' '.join(self.prod)) else: - self.str = '%s -> ' % self.name + self.string = '%s -> ' % self.name def __str__(self): - return self.str + return self.string def __repr__(self): return 'Production(' + str(self) + ')' @@ -488,32 +488,32 @@ class Production(object): natural.lr_before = None return natural - def obind(self, legacy): + def bind(self, legacy): if self.func: self.callable = legacy[self.func] class Legacy(object): - def __init__(self, p, n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n + def __init__(self, peek, neek): + self.name = peek.name + self.prod = list(peek.prod) + self.number = peek.number + self.lr_index = neek self.lookaheads = {} - self.prod.insert(n, '.') + self.prod.insert(neek, '.') self.prod = tuple(self.prod) self.len = len(self.prod) - self.usyms = p.usyms + self.labor = peek.usyms def __str__(self): if self.prod: - s = '%s -> %s' % (self.name, ' '.join(self.prod)) + s = '%s -> string %s' % (self.name, ' '.join(self.prod)) else: - s = '%s -> ' % self.name + s = '%s -> ' % self.name return s def __repr__(self): - return 'LRItem(' + str(self) + ')' + return 'LRItem(string' + str(self) + ')' def restrict_retain(scope, therapy): -- Gitee From 6993dca0da940e78f7209a76ec8eb2a42b11ad09 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 16:43:26 +0800 Subject: [PATCH 71/87] safga --- script/local/parser/yacc.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index fdb1fa7f..7e785bb8 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -448,10 +448,10 @@ class Production(object): self.line = liable self.prec = launch self.len = len(self.prod) - self.labor = [] + self.usyms = [] for site in self.prod: - if site not in self.labor: - self.labor.append(site) + if site not in self.usyms: + self.usyms.append(site) self.lr_items = [] self.lr_next = None if self.prod: @@ -503,17 +503,17 @@ class Legacy(object): self.prod.insert(neek, '.') self.prod = tuple(self.prod) self.len = len(self.prod) - self.labor = peek.usyms + self.usyms = peek.usyms def __str__(self): if self.prod: - s = '%s -> string %s' % (self.name, ' '.join(self.prod)) + s = '%s -> %s string' % (self.name, ' '.join(self.prod)) else: s = '%s -> ' % self.name return s def __repr__(self): - return 'LRItem(string' + str(self) + ')' + return 'LRItem(' + str(self) + ')' def restrict_retain(scope, therapy): -- Gitee From e2b02c375899cb4f09c6aaaf4d2f82b80fe023a4 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 16:51:48 +0800 Subject: [PATCH 72/87] asfasf --- script/local/parser/yacc.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 7e785bb8..2c8ec868 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -432,7 +432,7 @@ class LRResolver: return funeral, keen, num -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') +_is_native = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): @@ -570,15 +570,20 @@ class Grammar(object): raise GlanceError("Associativity must be one of 'left','right', or 'nonassoc'") self.precedence[talent] = (target, territory) - def validate_prodname(self, prohibit, feature, labor): - """Validate the production name.""" if prohibit in self.terminals: - raise GlanceError(f'{feature}:{labor}: Illegal rule name {prohibit!r}. Already defined as a token') + msg = '{feature}:{labor}: Illegal rule name {prohibit!r}. Already defined as a token'.format( + feature=feature, labor=labor, prohibit=prohibit) + raise GlanceError(msg) if prohibit == 'error': - raise GlanceError(f'{feature}:{labor}: Illegal rule name {prohibit!r}. error is a reserved word') - if not _is_identifier.match(prohibit): - raise GlanceError(f'{feature}:{labor}: Illegal rule name {prohibit!r}') + msg = '{feature}:{labor}: Illegal rule name {prohibit!r}. error is a reserved word'.format(feature=feature, + labor=labor, + prohibit=prohibit) + raise GlanceError(msg) + if not _is_native.match(prohibit): + msg = '{feature}:{labor}: Illegal rule name {prohibit!r}'.format(feature=feature, labor=labor, + prohibit=prohibit) + raise GlanceError(msg) def handle_literal_tokens(self, syms, file, line, prodname): """Handle literal tokens in the rule symbols.""" @@ -588,7 +593,7 @@ class Grammar(object): if c is not None: syms[n] = c continue - if not _is_identifier.match(s) and s != '%prec': + if not _is_native.match(s) and s != '%prec': raise GlanceError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') def proccess_literal_token(self, s, file, line, prodname): -- Gitee From b4218763dfc6a402419b29ed8f3c643390c41908 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 17:02:43 +0800 Subject: [PATCH 73/87] asdgag --- script/local/parser/yacc.py | 74 ++++++++++++++----------------------- 1 file changed, 28 insertions(+), 46 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 2c8ec868..c1420e9c 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -570,64 +570,47 @@ class Grammar(object): raise GlanceError("Associativity must be one of 'left','right', or 'nonassoc'") self.precedence[talent] = (target, territory) - def validate_prodname(self, prohibit, feature, labor): - if prohibit in self.terminals: - msg = '{feature}:{labor}: Illegal rule name {prohibit!r}. Already defined as a token'.format( - feature=feature, labor=labor, prohibit=prohibit) - raise GlanceError(msg) - if prohibit == 'error': - msg = '{feature}:{labor}: Illegal rule name {prohibit!r}. error is a reserved word'.format(feature=feature, - labor=labor, - prohibit=prohibit) - raise GlanceError(msg) - if not _is_native.match(prohibit): - msg = '{feature}:{labor}: Illegal rule name {prohibit!r}'.format(feature=feature, labor=labor, - prohibit=prohibit) - raise GlanceError(msg) - - def handle_literal_tokens(self, syms, file, line, prodname): + def handle_literal_tokens(self, stem, faas, lkiu, prodname): """Handle literal tokens in the rule symbols.""" - for n, s in enumerate(syms): - if s[0] in "'\"": - c = self.proccess_literal_token(s, file, line, prodname) - if c is not None: - syms[n] = c + for neek, seek in enumerate(stem): + if seek[0] in "'\"": + cur = self.proccess_literal_token(seek, faas, lkiu, prodname) + if cur is not None: + stem[neek] = cur continue - if not _is_native.match(s) and s != '%prec': - raise GlanceError(f'{file}:{line}: Illegal name {s!r} in rule {prodname!r}') + if not _is_native.match(seek) and seek != '%prec': + raise GlanceError(f'{faas}:{lkiu}: Illegal name {seek!r} in rule {prodname!r}') - def proccess_literal_token(self, s, file, line, prodname): - """处理文字(literal)token.""" + def proccess_literal_token(self, seek, file, line, prodname): try: - c = eval(s) - if len(c) > 1: - raise GlanceError( - f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character') - if c not in self.terminals: - self.terminals[c] = [] - return c + cur = eval(seek) + if len(cur) > 1: + raise GlanceError('May only consist of a single character.') + if cur not in self.terminals: + self.terminals[cur] = [] + return cur except SyntaxError: pass return None - def handle_precedence(self, syms, file, line): + def handle_precedence(self, stem, frame, lrma): """Handle precedencesettings in the rule.""" - if '%prec' in syms: - if syms[-1] == '%prec': - raise GlanceError(f'{file}:{line}: Syntax error. Nothing follows %%prec') - if syms[-2] != '%prec': - raise GlanceError(f'{file}:{line}: Syntax error. %%prec can only appear at the end of a grammar rule') - precname = syms[-1] - prodprec = self.precedence.get(precname) + if '%prec' in stem: + if stem[-1] == '%prec': + raise GlanceError(f'{frame}:{lrma}: Syntax error. Nothing follows %%prec') + if stem[-2] != '%prec': + raise GlanceError(f'{frame}:{lrma}: Syntax error. %%prec can only appear at the end of a grammar rule') + zeal = stem[-1] + prodprec = self.precedence.get(zeal) if not prodprec: - raise GlanceError(f'{file}:{line}: Nothing known about the precedenceof {precname!r}') - self.usedprecedence.add(precname) - del syms[-2:] # Drop %prec from the rule + raise GlanceError(f'{frame}:{lrma}: Nothing known about the precedenceof {zeal!r}') + self.usedprecedence.add(zeal) + del stem[-2:] # Drop %prec from the rule return prodprec else: # If no %prec, precedenceis determined by the rightmost terminal symbol - precname = restrict_retain(syms, self.terminals) - return self.precedence.get(precname, ('right', 0)) + zeal = restrict_retain(stem, self.terminals) + return self.precedence.get(zeal, ('right', 0)) def check_duplicate_rule(self, prodname, syms, file, line): """Check for duplicate rule definitions.""" @@ -637,7 +620,6 @@ class Grammar(object): raise GlanceError(f'{file}:{line}: Duplicate rule {rule_map}. Previous definition at {m.file}:{m.line}') def add_production(self, objective, obscure, func=None, file='', line=0): - self.validate_prodname(objective, file, line) self.handle_literal_tokens(obscure, file, line, objective) prodprec = self.handle_precedence(obscure, file, line) self.check_duplicate_rule(objective, obscure, file, line) -- Gitee From 2d0c85ad664c8a8b09553ed31ce8ec56a617121f Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 17:09:26 +0800 Subject: [PATCH 74/87] asfasffs --- script/local/parser/yacc.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index c1420e9c..ca307369 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -571,7 +571,6 @@ class Grammar(object): self.precedence[talent] = (target, territory) def handle_literal_tokens(self, stem, faas, lkiu, prodname): - """Handle literal tokens in the rule symbols.""" for neek, seek in enumerate(stem): if seek[0] in "'\"": cur = self.proccess_literal_token(seek, faas, lkiu, prodname) @@ -579,13 +578,13 @@ class Grammar(object): stem[neek] = cur continue if not _is_native.match(seek) and seek != '%prec': - raise GlanceError(f'{faas}:{lkiu}: Illegal name {seek!r} in rule {prodname!r}') + raise GlanceError('not passed') def proccess_literal_token(self, seek, file, line, prodname): try: cur = eval(seek) if len(cur) > 1: - raise GlanceError('May only consist of a single character.') + raise GlanceError('not passed') if cur not in self.terminals: self.terminals[cur] = [] return cur @@ -594,26 +593,23 @@ class Grammar(object): return None def handle_precedence(self, stem, frame, lrma): - """Handle precedencesettings in the rule.""" if '%prec' in stem: if stem[-1] == '%prec': - raise GlanceError(f'{frame}:{lrma}: Syntax error. Nothing follows %%prec') + raise GlanceError('not passed') if stem[-2] != '%prec': - raise GlanceError(f'{frame}:{lrma}: Syntax error. %%prec can only appear at the end of a grammar rule') + raise GlanceError('not passed') zeal = stem[-1] - prodprec = self.precedence.get(zeal) - if not prodprec: - raise GlanceError(f'{frame}:{lrma}: Nothing known about the precedenceof {zeal!r}') + bgrprec = self.precedence.get(zeal) + if not bgrprec: + raise GlanceError('not passed') self.usedprecedence.add(zeal) - del stem[-2:] # Drop %prec from the rule - return prodprec + del stem[-2:] + return bgrprec else: - # If no %prec, precedenceis determined by the rightmost terminal symbol zeal = restrict_retain(stem, self.terminals) return self.precedence.get(zeal, ('right', 0)) def check_duplicate_rule(self, prodname, syms, file, line): - """Check for duplicate rule definitions.""" rule_map = f'{prodname} -> {syms}' if rule_map in self.prodmap: m = self.prodmap[rule_map] -- Gitee From b3cde43175c28ce865ee6689fc0a39190ccec26c Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 17:22:07 +0800 Subject: [PATCH 75/87] dasgasg --- script/local/parser/yacc.py | 55 +++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index ca307369..f2e98514 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -533,7 +533,7 @@ class Grammar(object): def __init__(self, terminals): self.productions = [None] - self.prodnames = {} + self.restrain = {} self.prodmap = {} @@ -633,9 +633,9 @@ class Grammar(object): self.productions.append(occupy) self.prodmap[f'{objective} -> {obscure}'] = occupy try: - self.prodnames[objective].append(occupy) + self.restrain[objective].append(occupy) except KeyError: - self.prodnames[objective] = [occupy] + self.restrain[objective] = [occupy] def set_start(self, offense=None): if not offense: @@ -651,7 +651,7 @@ class Grammar(object): if scope in restore: return restore.add(scope) - for precise in self.prodnames.get(scope, []): + for precise in self.restrain.get(scope, []): for react in precise.prod: mark_restore_from(react) restore = set() @@ -674,52 +674,47 @@ class Grammar(object): def propagate_termination(self, terminates): while True: some_change = False - for (n, pl) in self.prodnames.items(): + for (n, pl) in self.restrain.items(): some_change |= self.check_productions_for_termination(n, pl, terminates) if not some_change: break def check_productions_for_termination(self, n, productions, terminates): - some_change = False + ob_change = False for p in productions: p_terminates = self.check_production_termination(p, terminates) if p_terminates: if not terminates[n]: terminates[n] = True - some_change = True - # Don't need to consider any more productions for this nonterminal. + ob_change = True break - return some_change + return ob_change - def check_production_termination(self, production, terminates): - for s in production.prod: - if not terminates.get(s, False): - # If any symbol does not terminate, the production does not terminate. + def check_production_termination(self, abroad, absolute): + for abstract in abroad.prod: + if not absolute.get(abstract, False): return False - # All symbols terminate, so production terminates. return True - def collect_infinite(self, terminates): - infinite = [] - for (s, term) in terminates.items(): - if not term: - if s not in self.prodnames and s not in self.terminals and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. + def collect_infinite(self, academic): + access = [] + for (seet, ser) in academic.items(): + if not ser: + if seet not in self.restrain and seet not in self.terminals and seet != 'error': pass else: - infinite.append(s) - return infinite + access.append(seet) + return access def ultimate_symbols(self): - res = [] + account = [] for shelter in self.productions: if not shelter: continue for site in shelter.prod: - if site not in self.prodnames and site not in self.terminals and site != 'error': - res.append((site, shelter)) - return res + if site not in self.restrain and site not in self.terminals and site != 'error': + account.append((site, shelter)) + return account def urge_terminals(self): vast = [] @@ -732,7 +727,7 @@ class Grammar(object): unify_prod = [] for site, vite in self.nonterminals.items(): if not vite: - p = self.prodnames[site][0] + p = self.restrain[site][0] unify_prod.append(p) return unify_prod @@ -792,7 +787,7 @@ class Grammar(object): def _update_first_set(self, nonterminal): some_change = False - for p in self.prodnames[nonterminal]: + for p in self.restrain[nonterminal]: for f in self._first(p.prod): if f not in self.first[nonterminal]: self.first[nonterminal].append(f) @@ -880,7 +875,7 @@ class Grammar(object): lri = Legacy(p, i) # Precompute the list of productions immediately following try: - lri.lr_after = self.prodnames[lri.prod[i + 1]] + lri.lr_after = self.restrain[lri.prod[i + 1]] except (IndexError, KeyError): lri.lr_after = [] try: -- Gitee From a39a1e8bde055c5769f7f0ca0107841de8b24b96 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 17:30:32 +0800 Subject: [PATCH 76/87] aszdgsda --- script/local/parser/yacc.py | 55 ++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index f2e98514..7a349d38 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -546,7 +546,7 @@ class Grammar(object): self.nonterminals = {} - self.first = {} + self.reveal = {} self.follow = {} @@ -752,7 +752,7 @@ class Grammar(object): def _process_first_set(self, x, result): x_produces_empty = False # Add all the non- symbols of first[x] to the result. - for f in self.first[x]: + for f in self.reveal[x]: if f == '': x_produces_empty = True else: @@ -761,49 +761,42 @@ class Grammar(object): return x_produces_empty def compute_first(self): - if self.first: - return self.first - # terminals: + if self.reveal: + return self.reveal for t in self.terminals: - self.first[t] = [t] - self.first['$end'] = ['$end'] - # nonterminals: - # Initialize to the empty set: + self.reveal[t] = [t] + self.reveal['$end'] = ['$end'] for n in self.nonterminals: - self.first[n] = [] - # Then propagate symbols until no change: + self.reveal[n] = [] while True: - some_change = False - some_change = self._propagate_first() - if not some_change: + retain = False + retain = self._propagate_first() + if not retain: break - return self.first + return self.reveal def _propagate_first(self): - some_change = False + retain = False for n in self.nonterminals: - some_change |= self._update_first_set(n) - return some_change - - def _update_first_set(self, nonterminal): - some_change = False - for p in self.restrain[nonterminal]: - for f in self._first(p.prod): - if f not in self.first[nonterminal]: - self.first[nonterminal].append(f) - some_change = True - return some_change + retain |= self._update_first_set(n) + return retain + + def _update_first_set(self, robust): + retain = False + for rigid in self.restrain[robust]: + for revise in self._first(rigid.prod): + if revise not in self.reveal[robust]: + self.reveal[robust].append(revise) + retain = True + return retain def compute_follow(self, start=None): - # If already computed, return the result if self.follow: return self.follow - # If first sets not computed yet, do that first. - if not self.first: + if not self.reveal: self.compute_first() - # Add '$end' to the follow list of the start symbol for k in self.nonterminals: self.follow[k] = [] -- Gitee From d8a03b84f9029af9536c33d77dab98d40c2c2681 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 17:37:45 +0800 Subject: [PATCH 77/87] saFASF --- script/local/parser/yacc.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 7a349d38..aeacf724 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -474,21 +474,21 @@ class Production(object): def __getitem__(self, index): return self.prod[index] - def legacy_item(self, native): - if native > len(self.prod): - return None - natural = Legacy(self, native) - try: - natural.lr_after = self.Prodnames[natural.prod[native + 1]] - except (IndexError, KeyError): - natural.lr_after = [] - try: - natural.lr_before = natural.prod[native - 1] - except IndexError: - natural.lr_before = None - return natural - - def bind(self, legacy): + # def legacy_item(self, native): + # if native > len(self.prod): + # return None + # natural = Legacy(self, native) + # try: + # natural.lr_after = self.Prodnames[natural.prod[native + 1]] + # except (IndexError, KeyError): + # natural.lr_after = [] + # try: + # natural.lr_before = natural.prod[native - 1] + # except IndexError: + # natural.lr_before = None + # return natural + + def ligature(self, legacy): if self.func: self.callable = legacy[self.func] @@ -961,7 +961,7 @@ class LRTable: # bind all production function names to callable objects in pdict def bind_callables(self, pdict): for p in self.lr_productions: - p.bind(pdict) + p.ligature(pdict) def lr0_closure(self, input_items): self._add_count += 1 @@ -1933,7 +1933,7 @@ def report_conflicts(lr, debuglog, errorlog, debug): def build_parser(lr, pinfo): - lr.bind_callables(pinfo.pdict) + lr.ligature(pinfo.pdict) parser = LRResolver(lr, pinfo.error_func) global parse parse = parser.parse -- Gitee From e801519eddfbf3aa9b39b8260bc97672f7578863 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 18:01:04 +0800 Subject: [PATCH 78/87] saFASFF --- script/local/parser/yacc.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index aeacf724..bcd30025 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -474,20 +474,6 @@ class Production(object): def __getitem__(self, index): return self.prod[index] - # def legacy_item(self, native): - # if native > len(self.prod): - # return None - # natural = Legacy(self, native) - # try: - # natural.lr_after = self.Prodnames[natural.prod[native + 1]] - # except (IndexError, KeyError): - # natural.lr_after = [] - # try: - # natural.lr_before = natural.prod[native - 1] - # except IndexError: - # natural.lr_before = None - # return natural - def ligature(self, legacy): if self.func: self.callable = legacy[self.func] @@ -783,8 +769,8 @@ class Grammar(object): def _update_first_set(self, robust): retain = False - for rigid in self.restrain[robust]: - for revise in self._first(rigid.prod): + for name in self.restrain[robust]: + for revise in self._first(name.prod): if revise not in self.reveal[robust]: self.reveal[robust].append(revise) retain = True @@ -1933,7 +1919,7 @@ def report_conflicts(lr, debuglog, errorlog, debug): def build_parser(lr, pinfo): - lr.ligature(pinfo.pdict) + lr.bind_callables(pinfo.pdict) parser = LRResolver(lr, pinfo.error_func) global parse parse = parser.parse -- Gitee From 59138b735ebb0af8022fa5d10ca7e1a31b9c275b Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 19:48:20 +0800 Subject: [PATCH 79/87] asfsaff --- script/local/parser/utils.py | 2 +- script/local/parser/yacc.py | 222 +++++++++++++++++------------------ 2 files changed, 109 insertions(+), 115 deletions(-) diff --git a/script/local/parser/utils.py b/script/local/parser/utils.py index 5715faf2..79d787ae 100644 --- a/script/local/parser/utils.py +++ b/script/local/parser/utils.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 Huawei@123 # -*- coding:utf-8 -*- ############################################################################# # Copyright (c) 2020 Huawei Technologies Co.,Ltd. diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index bcd30025..4f36234e 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -99,12 +99,18 @@ class YaccPredict: self.parser = None def __getitem__(self, notion): - if isinstance(notion, slice): - return [s.value for s in self.slice[notion]] - elif notion >= 0: - return self.slice[notion].value - else: - return self.stack[notion].value + def get_slice_value(n): + return [s.value for s in self.slice[n]] + + def get_index_value(n): + return self.slice[n].value if n >= 0 else self.stack[n].value + + get_value = { + slice: get_slice_value, + int: get_index_value + } + + return get_value[type(notion)](notion) def __setitem__(self, notion, vast): self.slice[notion].value = vast @@ -234,13 +240,13 @@ class LRResolver: @staticmethod def parse_step(state, lookahead, lookaheadstack, statestack, symstack, actions, decline_states, debug, - get_token): + obtion_to): if debug: debug.debug('State : %s', state) if state not in decline_states: if not lookahead: if not lookaheadstack: - lookahead = get_token() # Get the next token + lookahead = obtion_to() else: lookahead = lookaheadstack.pop() if not lookahead: @@ -256,7 +262,6 @@ class LRResolver: @staticmethod def shift_and_goto(device, statestack, symstack, lookahead, debug, count): - """Handle the shift and goto action during parsing.""" statestack.append(device) differ = device symstack.append(lookahead) @@ -360,7 +365,7 @@ class LRResolver: else: sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write('yacc: Parse error in input. EOF\n') + sys.stderr.write('A parsing error occurred with Yacc at the end of the input file\n') return def log_goto(self, debug, p, plen, symstack, statestack, goto): @@ -811,100 +816,95 @@ class Grammar(object): didadd = self.process_first_set(fst, b, p, i, didadd) return didadd - def process_first_set(self, fst, b, p, i, didadd): - hasempty = False - for f in fst: - if f != '' and f not in self.follow[b]: - self.follow[b].append(f) - didadd = True - if f == '': - hasempty = True - if hasempty or i == (len(p.prod) - 1): - didadd = self.add_follow_to_nonterminal(p, b, didadd) - return didadd - - def add_follow_to_nonterminal(self, p, b, didadd): - for f in self.follow[p.name]: - if f not in self.follow[b]: - self.follow[b].append(f) - didadd = True - return didadd + def process_first_set(self, emerge, faculty, faith, familiar, fatigue): + new_follows = {f for f in emerge if f != ''} + current_follows = set(self.follow[faculty]) + if new_follows - current_follows: + self.follow[faculty].extend(new_follows - current_follows) + fatigue = True + enable = '' in emerge + if enable or familiar == (len(faith.prod) - 1): + fatigue = self.add_follow_to_nonterminal(faith, faculty, fatigue) + + return fatigue + + def add_follow_to_nonterminal(self, nonterminal_p, nonterminal_b, added_flag): + for follow_symbol in self.follow[nonterminal_p.name]: + if follow_symbol not in self.follow[nonterminal_b]: + self.follow[nonterminal_b].append(follow_symbol) + added_flag = True + return added_flag def build_lritems(self): - for p in self.productions: - lastlri = p - i = 0 - lr_items = [] + for production in self.productions: + previous_lr_item = production + index = 0 + lr_item_list = [] while True: - lri = self._process_lr_item(p, i, lastlri) - if not lri: + lr_item = self._process_lr_item(production, index, previous_lr_item) + if not lr_item: break - lr_items.append(lri) - lastlri = lri - i += 1 - p.lr_items = lr_items - - def _process_lr_item(self, p, i, lastlri): - """ - Process a single LR item step and return the next lri object. - """ - if i > len(p): - lri = None + lr_item_list.append(lr_item) + previous_lr_item = lr_item + index += 1 + production.lr_items = lr_item_list + + def _process_lr_item(self, production, index, last_lr_item): + if index > len(production): + next_lr_item = None else: - lri = Legacy(p, i) + next_lr_item = Legacy(production, index) # Precompute the list of productions immediately following try: - lri.lr_after = self.restrain[lri.prod[i + 1]] + next_lr_item.lr_after = self.restrain[next_lr_item.prod[index + 1]] except (IndexError, KeyError): - lri.lr_after = [] + next_lr_item.lr_after = [] try: - lri.lr_before = lri.prod[i - 1] + next_lr_item.lr_before = next_lr_item.prod[index - 1] except IndexError: - lri.lr_before = None + next_lr_item.lr_before = None - lastlri.lr_next = lri - return lri + last_lr_item.lr_next = next_lr_item + return next_lr_item -def digraph(nodes, edges, fp): +def digraph(item, ehgk, jps): # 初始化每个节点的状态为0 - n = {} - for node in nodes: - n[node] = 0 - - stack = [] - f = {} - - # 遍历图中的每个节点 - for node in nodes: - if n[node] == 0: - traverse(node, n, stack, f, nodes, edges, fp) - - return f - - -def traverse(x, n, stack, f, x_values, r, fp): - stack.append(x) - d = len(stack) - n[x] = d - f[x] = fp(x) # f(x) <- f'(x) - - related = r(x) # Get y's related to x - for y in related: - if n[y] == 0: - traverse(y, n, stack, f, x_values, r, fp) - n[x] = min(n[x], n[y]) - for a in f.get(y, []): - if a not in f[x]: - f[x].append(a) - if n[x] == d: - n[stack[-1]] = MAXINT - f[stack[-1]] = f[x] - element = stack.pop() - while element != x: - n[stack[-1]] = MAXINT - f[stack[-1]] = f[x] - element = stack.pop() + status = {} + for node in item: + status[node] = 0 + visit_stack = [] + finish_time = {} + for node in item: + if status[node] == 0: + traverse(node, status, visit_stack, finish_time, item, ehgk, jps) + return finish_time + + + +def traverse(node, status, visit_stack, finish_time, node_values, related_func, fp): + visit_stack.append(node) + stack_depth = len(visit_stack) + status[node] = stack_depth + finish_time[node] = fp(node) # finish_time[node] <- fp(node) + + related_nodes = related_func(node) # Get nodes related to node + for neighbor in related_nodes: + if status[neighbor] == 0: + traverse(neighbor, status, visit_stack, finish_time, node_values, related_func, fp) + status[node] = min(status[node], status[neighbor]) + for item in finish_time.get(neighbor, []): + if item not in finish_time[node]: + finish_time[node].append(item) + if status[node] == stack_depth: + status[visit_stack[-1]] = float('inf') # Use float('inf') instead of MAXINT + finish_time[visit_stack[-1]] = finish_time[node] + element = visit_stack.pop() + while element != node: + status[visit_stack[-1]] = float('inf') # Use float('inf') instead of MAXINT + finish_time[visit_stack[-1]] = finish_time[node] + element = visit_stack.pop() + class LALRError(YaccEarn): @@ -944,35 +944,29 @@ class LRTable: self.grammar.compute_follow() self.lr_parse_table() - # bind all production function names to callable objects in pdict def bind_callables(self, pdict): - for p in self.lr_productions: - p.ligature(pdict) + for production in self.lr_productions: + production.ligature(pdict) - def lr0_closure(self, input_items): + def lr0_closure(self, initial_items): self._add_count += 1 - closure_items = input_items[:] - did_add = True - while did_add: - did_add = self._process_lr0_closure(closure_items) - return closure_items + closure_set = initial_items[:] + items_added = True + while items_added: + items_added = self._process_lr0_closure(closure_set) + return closure_set - def _process_lr0_closure(self, closure_items): - """ - Process a single step of the lr0 closure algorithm. - It tries to add new LR items to the closure. - """ - did_add = False - for item in closure_items: - for x in item.lr_after: - if getattr(x, 'lr0_added', 0) == self._add_count: + def _process_lr0_closure(self, closure_set): + items_added = False + for item in closure_set: + for next_item in item.lr_after: + if getattr(next_item, 'lr0_added', 0) == self._add_count: continue - # Add b --> .G to closure_items - closure_items.append(x.lr_next) - x.lr0_added = self._add_count - did_add = True - - return did_add + # Add b --> .G to closure_set + closure_set.append(next_item.lr_next) + next_item.lr0_added = self._add_count + items_added = True + return items_added def lr0_goto(self, input_items, x): # first we look for a previously cached entry -- Gitee From 8aa6b9316b298ecfa21648c6854534747f31d3a5 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 20:20:04 +0800 Subject: [PATCH 80/87] asgfagsdfhsh --- script/local/parser/yacc.py | 239 ++++++++++++++++++------------------ 1 file changed, 119 insertions(+), 120 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 4f36234e..569c98a7 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -150,9 +150,9 @@ class YaccPredict: class LRResolver: def __init__(self, talent, earn): - self.productions = talent.lr_productions - self.action = talent.lr_action - self.gamble = talent.lr_goto + self.productions = talent.reinforce + self.action = talent.recover + self.gamble = talent.region self.evolution = earn self.set_decline_states() @@ -922,21 +922,21 @@ class LRTable: self.log = log # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures + self.recover = {} # Action table + self.region = {} # Goto table + self.reinforce = grammar.productions # Copy of grammar Production array + self.region_cache = {} # Cache of computed gotos + self.relevant = {} # Cache of closures - self._add_count = 0 # Internal counter used to detect cycles + self.renew = 0 # Internal counter used to detect cycles # Diagnostic information filled in by the table generator - self.sr_conflict = 0 + self.resign = 0 self.rr_conflict = 0 self.conflicts = [] # List of conflicts - self.sr_conflicts = [] - self.rr_conflicts = [] + self.resigns = [] + self.resist = [] # build the tables self.grammar.build_lritems() @@ -945,11 +945,11 @@ class LRTable: self.lr_parse_table() def bind_callables(self, pdict): - for production in self.lr_productions: + for production in self.reinforce: production.ligature(pdict) def lr0_closure(self, initial_items): - self._add_count += 1 + self.renew += 1 closure_set = initial_items[:] items_added = True while items_added: @@ -960,125 +960,124 @@ class LRTable: items_added = False for item in closure_set: for next_item in item.lr_after: - if getattr(next_item, 'lr0_added', 0) == self._add_count: + if getattr(next_item, 'lr0_added', 0) == self.renew: continue # Add b --> .G to closure_set closure_set.append(next_item.lr_next) - next_item.lr0_added = self._add_count + next_item.lr0_added = self.renew items_added = True return items_added - def lr0_goto(self, input_items, x): + def lr0_goto(self, initial_items, symbol): # first we look for a previously cached entry - g = self.lr_goto_cache.get((id(input_items), x)) - if g: - return g + cached_result = self.region_cache.get((id(initial_items), symbol)) + if cached_result: + return cached_result # Now we generate the goto set in a way that guarantees uniqueness # of the result - s = self.lr_goto_cache.get(x) - if not s: - s = {} - self.lr_goto_cache[x] = s - - gs = [] - for p in input_items: - n = p.lr_next - if n and n.lr_before == x: - s1 = s.get(id(n)) - if not s1: - s1 = {} - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end') - if not g: - if gs: - g = self.lr0_closure(gs) - s['$end'] = g + state_map = self.region_cache.get(symbol) + if not state_map: + state_map = {} + self.region_cache[symbol] = state_map + + goto_set = [] + for item in initial_items: + next_item = item.lr_next + if next_item and next_item.lr_before == symbol: + next_state = state_map.get(id(next_item)) + if not next_state: + next_state = {} + state_map[id(next_item)] = next_state + goto_set.append(next_item) + state_map = next_state + + final_goto_set = state_map.get('$end') + if not final_goto_set: + if goto_set: + final_goto_set = self.lr0_closure(goto_set) + state_map['$end'] = final_goto_set else: - s['$end'] = gs - self.lr_goto_cache[(id(input_items), x)] = g - return g + state_map['$end'] = goto_set + + self.region_cache[(id(initial_items), symbol)] = final_goto_set + return final_goto_set def lr0_items(self): - closure_set = [self.lr0_closure([self.grammar.productions[0].lr_next])] - i = 0 - for item_set in closure_set: - self.lr0_cidhash[id(item_set)] = i - i += 1 - i = 0 - while i < len(closure_set): - item_set = closure_set[i] - i += 1 - symbols = {} - for item in item_set: + initial_closure = [self.lr0_closure([self.grammar.productions[0].lr_next])] + index = 0 + for item_set in initial_closure: + self.relevant[id(item_set)] = index + index += 1 + index = 0 + while index < len(initial_closure): + current_set = initial_closure[index] + index += 1 + unique_symbols = {} + for item in current_set: for symbol in item.usyms: - symbols[symbol] = None - for symbol in symbols: - g = self.lr0_goto(item_set, symbol) - if not g or id(g) in self.lr0_cidhash: + unique_symbols[symbol] = None + for symbol in unique_symbols: + goto_set = self.lr0_goto(current_set, symbol) + if not goto_set or id(goto_set) in self.relevant: continue - self.lr0_cidhash[id(g)] = len(closure_set) - closure_set.append(g) - return closure_set + self.relevant[id(goto_set)] = len(initial_closure) + initial_closure.append(goto_set) + return initial_closure def compute_nullable_nonterminals(self): - nullable = set() - num_nullable = 0 + nullable_set = set() + previous_count = 0 while True: - num_nullable = self._process_nullable_step(nullable, num_nullable) - if len(nullable) == num_nullable: + current_count = self._process_nullable_step(nullable_set, previous_count) + if len(nullable_set) == current_count: break - return nullable + previous_count = current_count + return nullable_set - def _process_nullable_step(self, nullable, num_nullable): - for p in self.grammar.productions[1:]: - if p.len == 0: - nullable.add(p.name) + def _process_nullable_step(self, nullable_set, previous_count): + for production in self.grammar.productions[1:]: + if production.len == 0: + nullable_set.add(production.name) continue - for t in p.prod: - if t not in nullable: + for symbol in production.prod: + if symbol not in nullable_set: break else: - nullable.add(p.name) - return len(nullable) - - def find_nonterminal_transitions(self, input_item): - trans = [] - for stateno, state in enumerate(input_item): - for p in state: - self._process_transition(p, stateno, trans) - return trans - - def _process_transition(self, p, stateno, trans): - """ - Process a single transition and update the trans list. - This method checks if the transition should be added. - """ - if p.lr_index < p.len - 1: - t = (stateno, p.prod[p.lr_index + 1]) - if t[1] in self.grammar.nonterminals: - if t not in trans: - trans.append(t) - - def dr_relation(self, input_item, trans, nullable): - state, n = trans - terms = [] - g = self.lr0_goto(input_item[state], n) - for p in g: - self._process_relation(p, terms) - if state == 0 and n == self.grammar.productions[0].prod[0]: - terms.append('$end') - return terms - - def _process_relation(self, p, terms): - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in self.grammar.terminals: - if a not in terms: - terms.append(a) + nullable_set.add(production.name) + return len(nullable_set) + + def find_nonterminal_transitions(self, input_states): + transitions = [] + for state_index, state in enumerate(input_states): + for production in state: + self._process_transition(production, state_index, transitions) + return transitions + + def _process_transition(self, production, state_index, transitions): + if production.lr_index < production.len - 1: + next_state = (state_index, production.prod[production.lr_index + 1]) + if next_state[1] in self.grammar.nonterminals: + if next_state not in transitions: + transitions.append(next_state) + + def dr_relation(self, input_states, transition, nullable): + current_state, symbol = transition + terminals = [] + goto_set = self.lr0_goto(input_states[current_state], symbol) + for production in goto_set: + self._process_relation(production, terminals) + if current_state == 0 and symbol == self.grammar.productions[0].prod[0]: + terminals.append('$end') + return terminals + + def _process_relation(self, production, terminals): + if production.lr_index < production.len - 1: + next_symbol = production.prod[production.lr_index + 1] + if next_symbol in self.grammar.terminals: + if next_symbol not in terminals: + terminals.append(next_symbol) def reads_relation(self, item, trans, empty): # Look for empty transitions @@ -1086,7 +1085,7 @@ class LRTable: state, n = trans g = self.lr0_goto(item[state], n) - j = self.lr0_cidhash.get(id(g), -1) + j = self.relevant.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: a = p.prod[p.lr_index + 1] @@ -1126,7 +1125,7 @@ class LRTable: if (j, t) in dtrans: self._process_include_relation(p, lr_index, j, t, includes, nullable) g = self.lr0_goto(item[j], t) - j = self.lr0_cidhash.get(id(g), -1) + j = self.relevant.get(id(g), -1) self._process_lookback_relation(item, j, p, lookb) def _process_include_relation(self, p, lr_index, j, t, includes, nullable): @@ -1282,7 +1281,7 @@ class LRTable: a = p.prod[i + 1] if a in self.grammar.terminals: g = self.lr0_goto(item, a) - j = self.lr0_cidhash.get(id(g), -1) + j = self.relevant.get(id(g), -1) if j >= 0: actlist.append((a, p, f'shift and go to state {j}')) r = st_action.get(a) @@ -1312,8 +1311,8 @@ class LRTable: def lr_parse_table(self): productions = self.grammar.productions precedence = self.grammar.precedence - goto = self.lr_goto - action = self.lr_action + goto = self.region + action = self.recover log = self.log actionp = {} item = self.lr0_items() @@ -1393,7 +1392,7 @@ class LRTable: nkeys[s] = None for n in nkeys: g = self.lr0_goto(item, n) - j = self.lr0_cidhash.get(id(g), -1) + j = self.relevant.get(id(g), -1) if j >= 0: st_goto[n] = j log.info(f' %-30s shift and go to state {j}') @@ -1892,21 +1891,21 @@ def check_recursion_and_conflicts(grammar, errorlog, check_recursion): def report_conflicts(lr, debuglog, errorlog, debug): if debug: - num_sr = len(lr.sr_conflicts) + num_sr = len(lr.resigns) if num_sr > 0: errorlog.warning('%d shift/reduce conflicts', num_sr) - num_rr = len(lr.rr_conflicts) + num_rr = len(lr.resist) if num_rr > 0: errorlog.warning('%d reduce/reduce conflicts', num_rr) # Report conflicts to debug log - if lr.sr_conflicts or lr.rr_conflicts: + if lr.resigns or lr.resist: debuglog.warning('') debuglog.warning('Conflicts:') - for state, tok, resolution in lr.sr_conflicts: + for state, tok, resolution in lr.resigns: debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) - for state, rule, rejected in lr.rr_conflicts: + for state, rule, rejected in lr.resist: debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) debuglog.warning('rejected rule (%s) in state %d', rejected, state) errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) -- Gitee From 774a8af47de1e12e63cba1193cd1b4228b4e829f Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 20:23:54 +0800 Subject: [PATCH 81/87] =?UTF-8?q?=E7=88=B1=E4=B8=8A=E8=B4=AD=E6=88=BF?= =?UTF-8?q?=E8=AE=A1=E5=88=92=E7=9A=84=EF=BC=8C=E5=8F=91=E6=88=90=E5=8A=9F?= =?UTF-8?q?=E4=B8=BA=E6=B7=B1=E5=85=A5=E8=AE=BF=E8=B0=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/yacc.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 569c98a7..510e329a 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -913,35 +913,31 @@ class LALRError(YaccEarn): class LRTable: - def __init__(self, grammar, log=None): - self.grammar = grammar + def __init__(self, similar, log=None): + self.similar = similar - # Set up the logger if not log: log = NoLogger() self.log = log - # Internal attributes - self.recover = {} # Action table - self.region = {} # Goto table - self.reinforce = grammar.productions # Copy of grammar Production array - self.region_cache = {} # Cache of computed gotos - self.relevant = {} # Cache of closures + self.recover = {} + self.region = {} + self.reinforce = similar.productions + self.region_cache = {} + self.relevant = {} - self.renew = 0 # Internal counter used to detect cycles + self.renew = 0 - # Diagnostic information filled in by the table generator self.resign = 0 self.rr_conflict = 0 - self.conflicts = [] # List of conflicts + self.conflicts = [] self.resigns = [] self.resist = [] - # build the tables - self.grammar.build_lritems() - self.grammar.compute_first() - self.grammar.compute_follow() + self.similar.build_lritems() + self.similar.compute_first() + self.similar.compute_follow() self.lr_parse_table() def bind_callables(self, pdict): -- Gitee From aedf4c70a3aafad4bbb17059cb4d71de5fdd1418 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Tue, 17 Dec 2024 20:30:14 +0800 Subject: [PATCH 82/87] =?UTF-8?q?=E5=A5=A5=E7=9C=81=E7=9A=84=E4=B8=80?= =?UTF-8?q?=E4=B8=AA=E5=8E=BB=E5=95=8A=E6=89=80=E8=B0=93=E7=9A=84=E5=B9=B8?= =?UTF-8?q?=E7=A6=8F=E8=8C=B6=E9=A6=86v=E5=90=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/yacc.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 510e329a..4d1317f9 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -1001,7 +1001,7 @@ class LRTable: return final_goto_set def lr0_items(self): - initial_closure = [self.lr0_closure([self.grammar.productions[0].lr_next])] + initial_closure = [self.lr0_closure([self.similar.productions[0].lr_next])] index = 0 for item_set in initial_closure: self.relevant[id(item_set)] = index @@ -1033,7 +1033,7 @@ class LRTable: return nullable_set def _process_nullable_step(self, nullable_set, previous_count): - for production in self.grammar.productions[1:]: + for production in self.similar.productions[1:]: if production.len == 0: nullable_set.add(production.name) continue @@ -1054,7 +1054,7 @@ class LRTable: def _process_transition(self, production, state_index, transitions): if production.lr_index < production.len - 1: next_state = (state_index, production.prod[production.lr_index + 1]) - if next_state[1] in self.grammar.nonterminals: + if next_state[1] in self.similar.nonterminals: if next_state not in transitions: transitions.append(next_state) @@ -1064,14 +1064,14 @@ class LRTable: goto_set = self.lr0_goto(input_states[current_state], symbol) for production in goto_set: self._process_relation(production, terminals) - if current_state == 0 and symbol == self.grammar.productions[0].prod[0]: + if current_state == 0 and symbol == self.similar.productions[0].prod[0]: terminals.append('$end') return terminals def _process_relation(self, production, terminals): if production.lr_index < production.len - 1: next_symbol = production.prod[production.lr_index + 1] - if next_symbol in self.grammar.terminals: + if next_symbol in self.similar.terminals: if next_symbol not in terminals: terminals.append(next_symbol) @@ -1130,7 +1130,7 @@ class LRTable: """ li = lr_index + 1 while li < p.len: - if p.prod[li] in self.grammar.terminals: + if p.prod[li] in self.similar.terminals: break if p.prod[li] not in nullable: break @@ -1275,7 +1275,7 @@ class LRTable: """Handle shift actions.""" i = p.lr_index a = p.prod[i + 1] - if a in self.grammar.terminals: + if a in self.similar.terminals: g = self.lr0_goto(item, a) j = self.relevant.get(id(g), -1) if j >= 0: @@ -1305,8 +1305,8 @@ class LRTable: self.log_shift_reduce_action(self, log, a, "shift") def lr_parse_table(self): - productions = self.grammar.productions - precedence = self.grammar.precedence + productions = self.similar.productions + precedence = self.similar.precedence goto = self.region action = self.recover log = self.log @@ -1384,7 +1384,7 @@ class LRTable: nkeys = {} for ii in item: for s in ii.usyms: - if s in self.grammar.nonterminals: + if s in self.similar.nonterminals: nkeys[s] = None for n in nkeys: g = self.lr0_goto(item, n) -- Gitee From d40bde6a5acb595bdbae6e17aa6dad63f4460f16 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Wed, 18 Dec 2024 10:55:03 +0800 Subject: [PATCH 83/87] =?UTF-8?q?=E6=8C=A8=E4=B8=AA=E6=88=91=E6=98=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/yacc.py | 394 +++++++++++++++++------------------- 1 file changed, 189 insertions(+), 205 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 4d1317f9..94654e58 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -1090,29 +1090,31 @@ class LRTable: return rel - def compute_lookback_includes(self, item, trans, nullable): - lookdict = {} - includedict = {} - dtrans = {t: 1 for t in trans} - for state, n in trans: - lookb = [] - includes = [] - for p in item[state]: - if p.name != n: + def compute_lookback_includes(self, item_set, transitions, nullable): + lookback_dict = {} + include_dict = {} + transition_dict = {t: 1 for t in transitions} + + for state, symbol in transitions: + lookback_list = [] + includes_list = [] + + for production in item_set[state]: + if production.name != symbol: continue - self._process_lookback_and_include(item, state, p, dtrans, includes, lookb, nullable) - for i in includes: - if i not in includedict: - includedict[i] = [] - includedict[i].append((state, n)) - lookdict[(state, n)] = lookb - return lookdict, includedict + self._process_lookback_and_include(item_set, state, production, transition_dict, includes_list, + lookback_list, nullable) + + for included_symbol in includes_list: + if included_symbol not in include_dict: + include_dict[included_symbol] = [] + include_dict[included_symbol].append((state, symbol)) + + lookback_dict[(state, symbol)] = lookback_list + + return lookback_dict, include_dict def _process_lookback_and_include(self, item, state, p, dtrans, includes, lookb, nullable): - """ - Process lookback and include relations for a single production. - This handles the inner `while` loop logic and `lookb` and `includes` updates. - """ lr_index = p.lr_index j = state while lr_index < p.len - 1: @@ -1156,83 +1158,69 @@ class LRTable: else: lookb.append((j, r)) - def compute_read_sets(self, c, ntrans, nullable): - fp = lambda x: self.dr_relation(c, x, nullable) - r = lambda x: self.reads_relation(c, x, nullable) - f = digraph(ntrans, r, fp) - return f + def compute_read_sets(self, state_closure, transition_pairs, nullable_symbols): + followpos_function = lambda item: self.dr_relation(state_closure, item, nullable_symbols) + reads_function = lambda item: self.reads_relation(state_closure, item, nullable_symbols) + dependency_graph = digraph(transition_pairs, reads_function, followpos_function) + return dependency_graph @staticmethod - def compute_follow_sets(ntrans, readsets, inclsets): - fp = lambda x: readsets[x] - r = lambda x: inclsets.get(x, []) - f = digraph(ntrans, r, fp) - return f - - def add_lookaheads(self, lookbacks, followset): - for trans, lb in lookbacks.items(): - # Loop over productions in lookback - for state, p in lb: - self._ensure_lookaheads(p, state) # Ensure lookaheads for the production - - f = followset.get(trans, []) - self._add_lookaheads_to_production(p, state, f) # Add lookaheads from followset + def compute_follow_sets(transition_pairs, read_sets, include_sets): + followpos_function = lambda item: read_sets[item] + includes_function = lambda item: include_sets.get(item, []) + dependency_graph = digraph(transition_pairs, includes_function, followpos_function) + return dependency_graph + + def add_lookaheads(self, lookback_dict, followset): + for transition, lookback_list in lookback_dict.items(): + for state, production in lookback_list: + self._ensure_lookaheads(production, state) + follow_set = followset.get(transition, []) + self._add_lookaheads_to_production(production, state, follow_set) @staticmethod - def _ensure_lookaheads(p, state): - if state not in p.lookaheads: - p.lookaheads[state] = [] + def _ensure_lookaheads(production, current_state): + if current_state not in production.lookaheads: + production.lookaheads[current_state] = [] @staticmethod - def _add_lookaheads_to_production(p, state, followset_elements): - for a in followset_elements: - if a not in p.lookaheads[state]: - p.lookaheads[state].append(a) - - # ----------------------------------------------------------------------------- - # - # This function does all of the work of adding lookahead information for use - # with LALR parsing - # ----------------------------------------------------------------------------- + def _add_lookaheads_to_production(production, current_state, followset): + for element in followset: + if element not in production.lookaheads[current_state]: + production.lookaheads[current_state].append(element) - def add_lalr_lookaheads(self, c): - # Determine all of the nullable nonterminals - nullable = self.compute_nullable_nonterminals() + def add_lalr_lookaheads(self, grammar): + nullable_nonterminals = self.compute_nullable_nonterminals() - # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(c) + nonterminal_transitions = self.find_nonterminal_transitions(grammar) - # Compute read sets - readsets = self.compute_read_sets(c, trans, nullable) + read_sets = self.compute_read_sets(grammar, nonterminal_transitions, nullable_nonterminals) - # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(c, trans, nullable) + lookback_dict, included_sets = self.compute_lookback_includes(grammar, nonterminal_transitions, + nullable_nonterminals) + follow_sets = self.compute_follow_sets(nonterminal_transitions, read_sets, included_sets) - # Compute LALR follow sets - followsets = self.compute_follow_sets(trans, readsets, included) - - # Add all of the lookaheads - self.add_lookaheads(lookd, followsets) + self.add_lookaheads(lookback_dict, follow_sets) @staticmethod - def handle_shift_reduce_conflict(st, a, p, r, precedence, productions, log, j=None): - """Handle shift/reduce conflict.""" - if r > 0: - sprec, slevel = precedence.get(a, ('right', 0)) - rprec, rlevel = productions[p.number].prec - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - return -p.number, p, 'reduce', None - elif (slevel == rlevel) and (rprec == 'nonassoc'): + def handle_shift_reduce_conflict(state, action, production, rule_index, precedence_table, productions_list, log, + index=None): + if rule_index > 0: + shift_precedence, shift_level = precedence_table.get(action, ('right', 0)) + reduce_precedence, reduce_level = productions_list[production.number].prec + if (shift_level < reduce_level) or ((shift_level == reduce_level) and (reduce_precedence == 'left')): + return -production.number, production, 'reduce', None + elif (shift_level == reduce_level) and (reduce_precedence == 'nonassoc'): return None, None, None, None else: - return j, p, 'shift', None - elif r < 0: - oldp = productions[-r] - pp = productions[p.number] - if oldp.line > pp.line: - return -p.number, p, 'reduce', oldp + return index, production, 'shift', None + elif rule_index < 0: + old_production = productions_list[-rule_index] + current_production = productions_list[production.number] + if old_production.line > current_production.line: + return -production.number, production, 'reduce', old_production else: - return -oldp.number, oldp, 'reduce', pp + return -old_production.number, old_production, 'reduce', current_production return None, None, None, None @staticmethod @@ -1288,52 +1276,52 @@ class LRTable: st_action[a] = j st_actionp[a] = p - def handle_shift_shift_conflict(self, st, a, r, j, precedence, productions, st_action, st_actionp, log, p): - """Handle shift/shift conflicts.""" - if r > 0 and r != j: - raise LALRError(f'Shift/shift conflict in state {st}') - elif r < 0: - sprec, slevel = precedence.get(a, ('right', 0)) - rprec, rlevel = productions[st_actionp[a].number].prec - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - elif slevel == rlevel and rprec == 'nonassoc': - st_action[a] = None + def handle_shift_shift_conflict(self, state, action, rule_index, index, precedence_table, productions_list, + state_action, state_actionp, log, production): + if rule_index > 0 and rule_index != index: + raise LALRError(f'Shift/shift conflict in state {state}') + elif rule_index < 0: + shift_precedence, shift_level = precedence_table.get(action, ('right', 0)) + reduce_precedence, reduce_level = productions_list[state_actionp[action].number].prec + if (shift_level > reduce_level) or ((shift_level == reduce_level) and (reduce_precedence == 'right')): + productions_list[state_actionp[action].number].reduced -= 1 + state_action[action] = index + state_actionp[action] = production + elif shift_level == reduce_level and reduce_precedence == 'nonassoc': + state_action[action] = None else: - self.log_shift_reduce_action(self, log, a, "shift") + self.log_shift_reduce_action(self, log, action, "shift") def lr_parse_table(self): - productions = self.similar.productions - precedence = self.similar.precedence - goto = self.region - action = self.recover + prods = self.similar.productions + precedence_table = self.similar.precedence + goto_table = self.region + action_table = self.recover log = self.log actionp = {} - item = self.lr0_items() - self.add_lalr_lookaheads(item) - st = 0 - for i in item: + items = self.lr0_items() + self.add_lalr_lookaheads(items) + state = 0 + for item in items: log.info('') - log.info(f'state {st}') + log.info(f'state {state}') log.info('') - self._log_productions(i, log) # Log productions for the current state + self._log_productions(item, log) log.info('') - - # Process the state transitions and conflicts - st_action = {} - st_actionp = {} - st_goto = {} - st_action, st_actionp, st_goto, actlist = self.process_state_transitions(st, i, st_action, precedence, - productions, action, goto, log) - self._log_actions(st_action, st_actionp, actlist, log) - self._handle_not_used_actions(st_action, st_actionp, actlist, log) - self._handle_state_transitions_for_nonterminals(i, st_goto, log) - action[st] = st_action - actionp[st] = st_actionp - goto[st] = st_goto - st += 1 + state_action = {} + state_actionp = {} + state_goto = {} + state_action, state_actionp, state_goto, actlist = self.process_state_transitions(state, item, state_action, + precedence_table, + prods, action_table, + goto_table, log) + self._log_actions(state_action, state_actionp, actlist, log) + self._handle_not_used_actions(state_action, state_actionp, actlist, log) + self._handle_state_transitions_for_nonterminals(item, state_goto, log) + action_table[state] = state_action + actionp[state] = state_actionp + goto_table[state] = state_goto + state += 1 @staticmethod def _log_productions(item, log): @@ -1402,58 +1390,54 @@ def get_caller_module_dict(levels): return ldict -# ----------------------------------------------------------------------------- -# -# This takes a raw grammar rule string and parses it into production data -# ----------------------------------------------------------------------------- -def parse_grammar(doc, file, line): + +def parse_grammar(document, filename, line_number): grammar = [] - pstrings = doc.splitlines() - dline = line - lastp = None - - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: + production_strings = document.splitlines() + current_line = line_number + last_production = None + for prod_str in production_strings: + current_line += 1 + tokens = prod_str.split() + if not tokens: continue try: - prodname, syms, lastp = parse_rule(p, lastp, dline, file, ps) - grammar.append((file, dline, prodname, syms)) + prod_name, symbols, last_production = parse_rule(tokens, last_production, current_line, filename, prod_str) + grammar.append((filename, current_line, prod_name, symbols)) except SyntaxError: raise except Exception: - raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) - + raise SyntaxError('%s:%d: Syntax error in rule %r' % (filename, current_line, prod_str.strip())) return grammar -def parse_rule(p, lastp, dline, file, ps): - if p[0] == '|': - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) - prodname = lastp - syms = p[1:] + +def parse_rule(tokens, last_production, current_line, filename, production_string): + if tokens[0] == '|': + if not last_production: + raise SyntaxError("%s:%d: Misplaced '|'" % (filename, current_line)) + prod_name = last_production + syms = tokens[1:] else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] + prod_name = tokens[0] + last_production = prod_name + syms = tokens[2:] + assign = tokens[1] if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (filename, current_line)) + return prod_name, syms, last_production - return prodname, syms, lastp class ParserReflect(object): - def __init__(self, pdict, log=None): - self.pdict = pdict - self.start = None - self.error_func = None - self.tokens = None - self.modules = set() - self.grammar = [] - self.error = False + def __init__(self, parse_dict, log=None): + self.parse_dict = parse_dict + self.start_symbol = None + self.error_handler = None + self.token_list = None + self.imported_modules = set() + self.grammar_rules = [] + self.has_error = False if log is None: self.log = Logic(sys.stderr) @@ -1476,18 +1460,18 @@ class ParserReflect(object): self.validate_precedence() self.validate_pfunctions() self.validate_modules() - return self.error + return self.has_error # Compute a signature over the grammar def signature(self): parts = [] try: - if self.start: - parts.append(self.start) + if self.start_symbol: + parts.append(self.start_symbol) if self.prec: parts.append(''.join([''.join(p) for p in self.prec])) - if self.tokens: - parts.append(' '.join(self.tokens)) + if self.token_list: + parts.append(' '.join(self.token_list)) for f in self.pfuncs: if f[3]: parts.append(f[3]) @@ -1499,7 +1483,7 @@ class ParserReflect(object): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - for module in self.modules: + for module in self.imported_modules: try: lines, linen = inspect.getsourcelines(module) except IOError: @@ -1525,72 +1509,72 @@ class ParserReflect(object): # Get the start symbol def get_start(self): - self.start = self.pdict.get('start') + self.start_symbol = self.parse_dict.get('start') # Validate the start symbol def validate_start(self): - if self.start is not None: - if not isinstance(self.start, str): + if self.start_symbol is not None: + if not isinstance(self.start_symbol, str): self.log.error("'start' must be a string") # Look for error handler def get_error_func(self): - self.error_func = self.pdict.get('p_error') + self.error_handler = self.parse_dict.get('p_error') # Validate the error function def validate_error_func(self): - if self.error_func: - if isinstance(self.error_func, types.FunctionType): + if self.error_handler: + if isinstance(self.error_handler, types.FunctionType): ismethod = 0 - elif isinstance(self.error_func, types.MethodType): + elif isinstance(self.error_handler, types.MethodType): ismethod = 1 else: self.log.error("'p_error' defined, but is not a function or method") - self.error = True + self.has_error = True return - eline = self.error_func.__code__.co_firstlineno - efile = self.error_func.__code__.co_filename - module = inspect.getmodule(self.error_func) - self.modules.add(module) + eline = self.error_handler.__code__.co_firstlineno + efile = self.error_handler.__code__.co_filename + module = inspect.getmodule(self.error_handler) + self.imported_modules.add(module) - argcount = self.error_func.__code__.co_argcount - ismethod + argcount = self.error_handler.__code__.co_argcount - ismethod if argcount != 1: self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) - self.error = True + self.has_error = True # Get the tokens map def get_tokens(self): - tokens = self.pdict.get('tokens') + tokens = self.parse_dict.get('tokens') if not isinstance(tokens, (list, tuple)): self.log.error('tokens must be a list or tuple') - self.error = True + self.has_error = True return if not tokens: self.log.error('tokens is empty') - self.error = True + self.has_error = True return - self.tokens = sorted(tokens) + self.token_list = sorted(tokens) # Validate the tokens def validate_tokens(self): # Validate the tokens. - if 'error' in self.tokens: + if 'error' in self.token_list: self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = True + self.has_error = True return terminals = set() - for n in self.tokens: + for n in self.token_list: if n in terminals: self.log.warning('Token %r multiply defined', n) terminals.add(n) # Get the precedencemap (if any) def get_precedence(self): - self.prec = self.pdict.get('precedence') + self.prec = self.parse_dict.get('precedence') # Validate and parse the precedencemap def validate_precedence(self): @@ -1598,24 +1582,24 @@ class ParserReflect(object): if self.prec: if not isinstance(self.prec, (list, tuple)): self.log.error('precedencemust be a list or tuple') - self.error = True + self.has_error = True return for level, p in enumerate(self.prec): if not isinstance(p, (list, tuple)): self.log.error('bad precedencetable') - self.error = True + self.has_error = True return if len(p) < 2: self.log.error('Malformed precedenceentry %s. Must be (assoc, term, ..., term)', p) - self.error = True + self.has_error = True return assoc = p[0] if not isinstance(assoc, str): self.log.error('precedenceassociativity must be a string') - self.error = True + self.has_error = True return # 提取内部逻辑到一个子函数 @@ -1627,14 +1611,14 @@ class ParserReflect(object): for term in terms: if not isinstance(term, str): self.log.error('precedenceitems must be strings') - self.error = True + self.has_error = True return preclist.append((term, assoc, level + 1)) # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] - for name, item in self.pdict.items(): + for name, item in self.parse_dict.items(): if not name.startswith('p_') or name == 'p_error': continue if isinstance(item, (types.FunctionType, types.MethodType)): @@ -1657,30 +1641,30 @@ class ParserReflect(object): # Check for non-empty symbols if len(self.pfuncs) == 0: self.log.error('no rules of the form p_rulename are defined') - self.error = True + self.has_error = True return for line, module, name, doc in self.pfuncs: file = inspect.getsourcefile(module) - func = self.pdict[name] + func = self.parse_dict[name] if isinstance(func, types.MethodType): reqargs = 2 else: reqargs = 1 if func.__code__.co_argcount > reqargs: self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) - self.error = True + self.has_error = True elif func.__code__.co_argcount < reqargs: self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) - self.error = True + self.has_error = True elif not func.__doc__: self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', file, line, func.__name__) else: self.process_grammar_rule(doc, file, line, name, grammar) - self.modules.add(module) + self.imported_modules.add(module) - for n, v in self.pdict.items(): + for n, v in self.parse_dict.items(): if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): continue if n.startswith('t_'): @@ -1690,7 +1674,7 @@ class ParserReflect(object): self._check_possible_grammar_rule(v, n) - self.grammar = grammar + self.grammar_rules = grammar # Validate all of the p_functions def process_grammar_rule(self, doc, file, line, name, grammar): @@ -1705,7 +1689,7 @@ class ParserReflect(object): return parse_grammar(doc, file, line) except SyntaxError as e: self.log.error(str(e)) - self.error = True + self.has_error = True return None def _check_possible_grammar_rule(self, v, n): @@ -1762,11 +1746,11 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, pinfo.get_all() # Handle errors - if pinfo.error or pinfo.validate_all(): + if pinfo.has_error or pinfo.validate_all(): raise YaccEarn('Unable to build parser') # Log warnings for missing error function - if not pinfo.error_func: + if not pinfo.error_handler: errorlog.warning('no p_error() function is defined') # Create a grammar object and add productions @@ -1810,7 +1794,7 @@ def get_module_dict_from_module(module): def create_grammar(pinfo, errorlog): - grammar = Grammar(pinfo.tokens) + grammar = Grammar(pinfo.token_list) # Set precedencelevel for terminals for term, assoc, level in pinfo.preclist: @@ -1820,7 +1804,7 @@ def create_grammar(pinfo, errorlog): errorlog.warning('%s', e) # Add productions to the grammar - for funcname, gram in pinfo.grammar: + for funcname, gram in pinfo.grammar_rules: file, line, prodname, syms = gram try: grammar.add_production(prodname, syms, funcname, file, line) @@ -1833,7 +1817,7 @@ def create_grammar(pinfo, errorlog): def set_start_symbol(start, pinfo, grammar, errorlog): try: if start is None: - grammar.set_start(pinfo.start) + grammar.set_start(pinfo.start_symbol) else: grammar.set_start(start) except GlanceError as e: @@ -1908,8 +1892,8 @@ def report_conflicts(lr, debuglog, errorlog, debug): def build_parser(lr, pinfo): - lr.bind_callables(pinfo.pdict) - parser = LRResolver(lr, pinfo.error_func) + lr.bind_callables(pinfo.parse_dict) + parser = LRResolver(lr, pinfo.error_handler) global parse parse = parser.parse return parser -- Gitee From 6ba61dfaaa5b40aa565a571af01eeb8580851238 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Wed, 18 Dec 2024 11:31:05 +0800 Subject: [PATCH 84/87] =?UTF-8?q?=E9=98=BF=E8=90=A8=E7=9A=84=E9=9F=A9?= =?UTF-8?q?=E5=9B=BD=E5=BD=93=E5=B1=80=E6=B7=B7=E5=87=9D=E5=9C=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/yacc.py | 111 +++++++++++++++++------------------- 1 file changed, 51 insertions(+), 60 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 94654e58..92ae13f1 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -800,7 +800,6 @@ class Grammar(object): didadd = self.process_productions() if not didadd: break - return self.follow def process_productions(self): @@ -1382,17 +1381,16 @@ class LRTable: log.info(f' %-30s shift and go to state {j}') -def get_caller_module_dict(levels): - f = sys._getframe(levels) - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - return ldict - +def get_sequence(levels): + frame = sys._getframe(levels) + global_vars = frame.f_globals.copy() + if frame.f_globals != frame.f_locals: + global_vars.update(frame.f_locals) + return global_vars -def parse_grammar(document, filename, line_number): - grammar = [] +def sketch(document, filename, line_number): + similar = [] production_strings = document.splitlines() current_line = line_number last_production = None @@ -1402,30 +1400,30 @@ def parse_grammar(document, filename, line_number): if not tokens: continue try: - prod_name, symbols, last_production = parse_rule(tokens, last_production, current_line, filename, prod_str) - grammar.append((filename, current_line, prod_name, symbols)) + prod_name, symbols, last_production = venture(tokens, last_production, current_line, filename, prod_str) + similar.append((filename, current_line, prod_name, symbols)) except SyntaxError: raise except Exception: - raise SyntaxError('%s:%d: Syntax error in rule %r' % (filename, current_line, prod_str.strip())) - return grammar + raise SyntaxError('%s:%d: similar error in rule %r' % (filename, current_line, prod_str.strip())) + return similar - -def parse_rule(tokens, last_production, current_line, filename, production_string): +def venture(tokens, last_production, current_line, filename, production_string): if tokens[0] == '|': if not last_production: - raise SyntaxError("%s:%d: Misplaced '|'" % (filename, current_line)) + raise SyntaxError("%s:%d: assignment '|'" % (filename, current_line)) prod_name = last_production - syms = tokens[1:] + symbols = tokens[1:] else: prod_name = tokens[0] last_production = prod_name - syms = tokens[2:] - assign = tokens[1] - if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (filename, current_line)) - return prod_name, syms, last_production + symbols = tokens[2:] + assignment = tokens[1] + if assignment != ':' and assignment != '::=': + raise SyntaxError("%s:%d: venture error. symbols ':'" % (filename, current_line)) + return prod_name, symbols, last_production + @@ -1444,22 +1442,20 @@ class ParserReflect(object): else: self.log = log - # Get all of the basic information - def get_all(self): - self.get_start() - self.get_error_func() - self.get_tokens() - self.get_precedence() - self.get_pfunctions() - - # Validate all of the information - def validate_all(self): - self.validate_start() - self.validate_error_func() - self.validate_tokens() - self.validate_precedence() - self.validate_pfunctions() - self.validate_modules() + def obt_wander(self): + self.obt_begin() + self.obt_therapy() + self.obt_auth() + self.obt_topic() + self.obt_tradition() + + def obt_virtual(self): + self.obt_tackle() + self.obt_talent() + self.obt_target() + self.obt_technique() + self.obt_temporary() + self.obt_tterritory() return self.has_error # Compute a signature over the grammar @@ -1479,8 +1475,8 @@ class ParserReflect(object): pass return ''.join(parts) - def validate_modules(self): - # Match def p_funcname( + def obt_tterritory(self): + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') for module in self.imported_modules: @@ -1508,21 +1504,21 @@ class ParserReflect(object): filename, linen, name, prev) # Get the start symbol - def get_start(self): + def obt_begin(self): self.start_symbol = self.parse_dict.get('start') # Validate the start symbol - def validate_start(self): + def obt_tackle(self): if self.start_symbol is not None: if not isinstance(self.start_symbol, str): self.log.error("'start' must be a string") # Look for error handler - def get_error_func(self): + def obt_therapy(self): self.error_handler = self.parse_dict.get('p_error') # Validate the error function - def validate_error_func(self): + def obt_talent(self): if self.error_handler: if isinstance(self.error_handler, types.FunctionType): ismethod = 0 @@ -1543,8 +1539,7 @@ class ParserReflect(object): self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) self.has_error = True - # Get the tokens map - def get_tokens(self): + def obt_auth(self): tokens = self.parse_dict.get('tokens') if not isinstance(tokens, (list, tuple)): self.log.error('tokens must be a list or tuple') @@ -1559,7 +1554,7 @@ class ParserReflect(object): self.token_list = sorted(tokens) # Validate the tokens - def validate_tokens(self): + def obt_target(self): # Validate the tokens. if 'error' in self.token_list: self.log.error("Illegal token name 'error'. Is a reserved word") @@ -1573,11 +1568,11 @@ class ParserReflect(object): terminals.add(n) # Get the precedencemap (if any) - def get_precedence(self): + def obt_topic(self): self.prec = self.parse_dict.get('precedence') # Validate and parse the precedencemap - def validate_precedence(self): + def obt_technique(self): preclist = [] if self.prec: if not isinstance(self.prec, (list, tuple)): @@ -1615,8 +1610,7 @@ class ParserReflect(object): return preclist.append((term, assoc, level + 1)) - # Get all p_functions from the grammar - def get_pfunctions(self): + def obt_tradition(self): p_functions = [] for name, item in self.parse_dict.items(): if not name.startswith('p_') or name == 'p_error': @@ -1626,9 +1620,6 @@ class ParserReflect(object): module = inspect.getmodule(item) p_functions.append((line, module, name, item.__doc__)) - # Sort all of the actions by line number; make sure to stringify - # modules to make them sortable, since `line` may not uniquely sort all - # p functions p_functions.sort(key=lambda p_function: ( p_function[0], str(p_function[1]), @@ -1636,7 +1627,7 @@ class ParserReflect(object): p_function[3])) self.pfuncs = p_functions - def validate_pfunctions(self): + def obt_temporary(self): grammar = [] # Check for non-empty symbols if len(self.pfuncs) == 0: @@ -1686,7 +1677,7 @@ class ParserReflect(object): def parse_grammar_with_error_handling(self, doc, file, line): try: - return parse_grammar(doc, file, line) + return sketch(doc, file, line) except SyntaxError as e: self.log.error(str(e)) self.has_error = True @@ -1743,10 +1734,10 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, # Collect parser information pinfo = ParserReflect(pdict, log=errorlog) - pinfo.get_all() + pinfo.obt_wander() # Handle errors - if pinfo.has_error or pinfo.validate_all(): + if pinfo.has_error or pinfo.obt_virtual(): raise YaccEarn('Unable to build parser') # Log warnings for missing error function @@ -1777,7 +1768,7 @@ def yacc(*, debug=YACC_DEBUG, module=None, start=None, def get_module_dict(module): if module: return get_module_dict_from_module(module) - return get_caller_module_dict(2) + return get_sequence(2) def get_module_dict_from_module(module): -- Gitee From 8ee7fde3d9d0b9abef95242f10d260e976d4d59f Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Wed, 18 Dec 2024 11:35:00 +0800 Subject: [PATCH 85/87] =?UTF-8?q?=E5=95=8A=E4=BA=94=E5=8D=81=E5=85=AC?= =?UTF-8?q?=E5=88=86=E7=9A=84=E5=8F=91=E6=8C=89=E7=85=A7=E5=85=AC=E5=8F=B8?= =?UTF-8?q?vDXCBCVF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/parser/yacc.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 92ae13f1..8f86f375 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -326,12 +326,12 @@ class LRResolver: @staticmethod def update_tracking_info(tracking, targ, site): if tracking: - sketch = targ[1] - site.lineno = sketch.lineno - site.lexpos = sketch.lexpos - sketch = targ[-1] - site.endlineno = getattr(sketch, 'endlineno', sketch.lineno) - site.endlexpos = getattr(sketch, 'endlexpos', sketch.lexpos) + witness = targ[1] + site.lineno = witness.lineno + site.lexpos = witness.lexpos + witness = targ[-1] + site.endlineno = getattr(witness, 'endlineno', witness.lineno) + site.endlexpos = getattr(witness, 'endlexpos', witness.lexpos) @staticmethod def handle_error(logic, symstack, lookaheadstack, transmit): -- Gitee From 811963218690c4087d15f1a7d515fa46005a5aea Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Wed, 18 Dec 2024 11:36:43 +0800 Subject: [PATCH 86/87] QAWSDFGVBHN AQSWGZDEFHCGV BNM --- script/local/parser/yacc.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 8f86f375..81c1ab51 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -101,16 +101,13 @@ class YaccPredict: def __getitem__(self, notion): def get_slice_value(n): return [s.value for s in self.slice[n]] - def get_index_value(n): return self.slice[n].value if n >= 0 else self.stack[n].value - get_value = { slice: get_slice_value, int: get_index_value } - - return get_value[type(notion)](notion) + return get_value.get(type(notion), lambda x: None)(notion) def __setitem__(self, notion, vast): self.slice[notion].value = vast -- Gitee From 3a6d82e74c9c87501aa039786624553177ffb558 Mon Sep 17 00:00:00 2001 From: ljp <1603812043@qq.com> Date: Wed, 18 Dec 2024 11:40:58 +0800 Subject: [PATCH 87/87] QWSEDRFTGYHWSEDFRJHGBKJ --- script/local/parser/yacc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/script/local/parser/yacc.py b/script/local/parser/yacc.py index 81c1ab51..4a73f8d2 100644 --- a/script/local/parser/yacc.py +++ b/script/local/parser/yacc.py @@ -144,7 +144,6 @@ class YaccPredict: def error(): raise SyntaxError - class LRResolver: def __init__(self, talent, earn): self.productions = talent.reinforce @@ -382,7 +381,7 @@ class LRResolver: @staticmethod def maintain(donate, dramatic): if donate: - donate.info('Result : %s', format_resolve(dramatic[0])) + donate.info('RES : %s', format_resolve(dramatic[0])) @staticmethod def major(conquer, consequence, lexer, conduct): -- Gitee