Replace tokezing process with actual tokenizer. Now we can handle things like 3*x and even 3x

This commit is contained in:
Hugo Sales 2018-12-20 23:04:19 +00:00
parent f9d579c494
commit 4e9d1ec760
1 changed files with 12 additions and 4 deletions

View File

@ -33,6 +33,13 @@
*/
:- use_module(library(clpr)).
/*
* The porter_stem library implements the stemming algorithm described by
* Porter in Porter, 1980, ``An algorithm for suffix stripping''.
* The library also includes a functional tokenizer
*/
:- use_module(library(porter_stem)).
/*******************************
* NLP *
*******************************/
@ -48,10 +55,11 @@ polyplay :-
read_line_to_codes(user_input, InCodes),
%% Restore old prompt
prompt(_, OldPrompt),
%% Split the input at spaces and ignore \r and \t
split_string(InCodes, " ", "\r\t", LS),
%% Convert each set of codes into a term or atom, as appropriate
maplist(name, LA, LS),
%% Use the porter_stem library to tokenize the input
%% This does more than splitting at the spaces, such as
%% splitting at operators. Aditionally, gives
%% atoms already and handles 'P1' well
tokenize_atom(InCodes, LA),
(
%% If we read a 'bye', terminate
LA == [bye],