Logo Studenta

main-parcial

¡Estudia con miles de materiales!

Vista previa del material en texto

lOMoARcPSD|3741347 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lOMoARcPSD|3741347 
 
 
 
%% LIMPIAR 
 
clc; 
clear all; 
close all; 
 
%% AGREGAR PATH 
 
addpath(genpath('mat-codes')) 
 
%% CARGAR DATOS 
load datasets/datos.mat; 
X1=X; 
X1(:,9)=[]; 
Xnewr=Xnew; 
Xnewr(:,9)=[]; 
y=y; 
 
%% FIGURAS 
 
R=corr(X1); 
figure; 
imagesc(R),colorbar; 
title('Matriz de Correlacion'); 
print('Matriz_de_Correlacion','-dpng'); 
 
[Z,W,lambda]=A_pca(zscore(X1),0.95); 
figure; 
gscatter(Z(:,1),Z(:,2),y); 
title('PCA'); 
print('PCA','-dpng'); 
 
figure; 
scatter3(Z(:,1),Z(:,2),Z(:,3),30,y,'filled'); 
title('Analaisis 3D PCA') 
print('Analasis_3D','-dpng'); 
 
figure 
bar(abs(W(:,1))); 
 
title('Relevancia'); 
print('Relevancia','-dpng'); 
 
%% DISTANCIA 
 
D=pdist2(X1,X1); 
figure 
imagesc(D),colorbar; 
 
title('MATRIZ DE DISTANCIA'); 
print('matriz_distancia','-dpng'); 
 
lOMoARcPSD|3741347 
 
 
 
%% inicializar variables 
nr = 10; %# repeticiones 
ptrain = 0.7; % %entrenamiento 
C = numel(unique(y)); % numero de clases 
P = size(X1,2); % numero de caracteristicas 
 
itest = cell(1,nr); %indices cross-validacion 
itrain = cell(1,nr); 
rho_pca = zeros(nr,P);%relevancia pca 
indr_pca = zeros(nr,P); 
 
rho_relieff = zeros(nr,P);%relevancia relieff 
indr_relieff = zeros(nr,P); 
 
acc_test_lp = zeros(nr,P);%aciertos lineal 
Cm_test_lp = zeros(C,C,nr,P); 
acc_test_lr = zeros(nr,P);%aciertos lineal 
Cm_test_lr = zeros(C,C,nr,P); 
 
acc_test_qp = zeros(nr,P);%aciertos cuadratico 
Cm_test_qp = zeros(C,C,nr,P); 
acc_test_qr = zeros(nr,P);%aciertos cuadratico 
Cm_test_qr = zeros(C,C,nr,P); 
 
acc_test_kp = zeros(nr,P);%aciertos knn 
Cm_test_kp = zeros(C,C,nr,P); 
acc_test_kr = zeros(nr,P);%aciertos knn 
Cm_test_kr = zeros(C,C,nr,P); 
 
%% ciclo principal por repeticiones 
ti = clock; 
 
 
for i = 1 : nr %validacion cruzada 
tic 
fprintf('it.%d/%d...\n',i,nr) 
[itest{1,i}, itrain{1,i}] = crossvalind('HoldOut',y,ptrain); 
Xtrain = X1(itrain{1,i},:); 
Xtest = X1(itest{1,i},:); 
ytrain = y(itrain{1,i}); 
ytest = y(itest{1,i}); 
%% normalizar zscore 
[Xtrain,muz,stdz] = zscore(Xtrain); 
Xtest = (Xtest- 
repmat(muz,size(Xtest,1),1))./repmat(stdz,size(Xtest,1),1); 
%% representación selección de características 
[~,W,lambda] = A_pca(Xtrain,0.95); 
rho_pca(i,:) = 
sum(abs(W.*repmat(lambda(1:size(W,2))',size(W,1),1)),2); 
%normalizar relevancia de 0 a 1 
rho_pca(i,:) = rho_pca(i,:)-min(rho_pca(i,:)); 
rho_pca(i,:) = rho_pca(i,:)/std(rho_pca(i,:)); 
rho_pca(i,:) = rho_pca(i,:)/max(rho_pca(i,:)); 
 
[~,indr_pca(i,:)] = sort(rho_pca(i,:),'descend'); 
 
lOMoARcPSD|3741347 
 
 
 
%% relevancia relieff 
[indr_relieff(i,:),rho_relieff(i,:)] = relieff(Xtrain,ytrain,1); 
rho_relieff(i,:) = rho_relieff(i,:)- min(rho_relieff(i,:)); 
rho_relieff(i,:) = rho_relieff(i,:)/std(rho_relieff(i,:)); 
rho_relieff(i,:) = rho_relieff(i,:)/max(rho_relieff(i,:)); 
 
%% ciclo seleccion de características 
tj = clock; 
for j = 1 : P 
fprintf('iter=%d, #car=%d\n',i,j) 
%seleccion segun indr_pca 
Ztrain_p = Xtrain(:,indr_pca(i,1:j)); 
Ztest_p = Xtest(:,indr_pca(i,1:j)); 
 
%seleccion segun indr_relieff 
Ztrain_r = Xtrain(:,indr_relieff(i,1:j)); 
Ztest_r = Xtest(:,indr_relieff(i,1:j)); 
 
%clasificar 
 
%Bayes ingenuo - Naive Bayes 
 
 
%linear 
mdl_lp = fitcdiscr(Ztrain_p,ytrain,'Discrimtype','linear'); 
mdl_lr = fitcdiscr(Ztrain_r,ytrain,'Discrimtype','linear'); 
 
%quadratic 
mdl_qp = 
fitcdiscr(Ztrain_p,ytrain,'Discrimtype','pseudoquadratic'); 
mdl_qr = 
fitcdiscr(Ztrain_r,ytrain,'Discrimtype','pseudoquadratic'); 
 
%knn 
% k = 1; 
% mdl_kp = fitcknn(Ztrain_p,ytrain,'NumNeighbors',k); 
% mdl_kr = fitcknn(Ztrain_r,ytrain,'NumNeighbors',k); 
 
kp(i,j) = cvaknn(Ztrain_p,ytrain); 
mdl_kp = fitcknn(Ztrain_p,ytrain,'NumNeighbors',kp(i,j)); 
 
kr(i,j) = cvaknn(Ztrain_r,ytrain); 
mdl_kr = fitcknn(Ztrain_r,ytrain,'NumNeighbors',kr(i,j)); 
 
%svm 
 
 
%net 
 
 
%calcular aciertos 
 
%Bayes ingenuo 
 
lOMoARcPSD|3741347 
 
 
 
%lienal 
ytest_e_lp = predict(mdl_lp,Ztest_p); 
[acc_test_lp(i,j),Cm_test_lp(:,:,i,j)] = 
A_acc_cm(ytest,ytest_e_lp,C); 
ytest_e_lr = predict(mdl_lr,Ztest_r); 
[acc_test_lr(i,j),Cm_test_lr(:,:,i,j)] = 
A_acc_cm(ytest,ytest_e_lr,C); 
 
%cuadratico 
ytest_e_qp = predict(mdl_qp,Ztest_p); 
[acc_test_qp(i,j),Cm_test_qp(:,:,i,j)] = 
A_acc_cm(ytest,ytest_e_qp,C); 
ytest_e_qr = predict(mdl_qr,Ztest_r); 
[acc_test_qr(i,j),Cm_test_qr(:,:,i,j)] = 
A_acc_cm(ytest,ytest_e_qr,C); 
 
%knn 
ytest_e_kp = predict(mdl_kp,Ztest_p); 
[acc_test_kp(i,j),Cm_test_kp(:,:,i,j)] = 
A_acc_cm(ytest,ytest_e_kp,C); 
ytest_e_kr = predict(mdl_kr,Ztest_r); 
[acc_test_kr(i,j),Cm_test_kr(:,:,i,j)] = 
A_acc_cm(ytest,ytest_e_kr,C); 
 
%svm 
 
%net 
 
end 
[al,ml] = max(acc_test_lp(i,:)); 
[aq,mq] = max(acc_test_qp(i,:)); 
[ak,mk] = max(acc_test_kp(i,:)); 
fprintf('acc_max_l_pca=%.2f (#f=%d)\nacc_max_q_pca=%.2f(#f= 
%d)\nacc_max_knn_pca=%.2f(#f=%d)\n',... 
al(1),ml(1),aq(1),mq(1),ak(1),mk(1)) 
[al,ml] = max(acc_test_lr(i,:)); 
[aq,mq] = max(acc_test_qr(i,:)); 
[ak,mk] = max(acc_test_kr(i,:)); 
fprintf('acc_max_l_reli=%.2f (#f=%d)\nacc_max_q_reli=%.2f(#f= 
%d)\nacc_max_knn_reli=%.2f(#f=%d)\n',... 
al(1),ml(1),aq(1),mq(1),ak(1),mk(1)) 
 
 
 
fprintf('tiempo it %.2f [s]\n',etime(clock,tj)) 
end 
 
%% GRAFICAS 
 
%aciertos 
figure 
hold on 
errorbar(1:P,mean(acc_test_lp),std(acc_test_lp),'y-.','LineWidth',2) 
errorbar(1:P,mean(acc_test_qp),std(acc_test_qp),'b-.','LineWidth',2) 
errorbar(1:P,mean(acc_test_kp),std(acc_test_kp),'g-.','LineWidth',2) 
 
lOMoARcPSD|3741347 
 
 
 
 
errorbar(1:P,mean(acc_test_lr),std(acc_test_lr),'y','LineWidth',2) 
errorbar(1:P,mean(acc_test_qr),std(acc_test_qr),'b','LineWidth',2) 
errorbar(1:P,mean(acc_test_kr),std(acc_test_kr),'g','LineWidth',2) 
xlabel('Número características relevantes') 
ylabel('Acierto [%]') 
legend({'Var+Lin.';'Var+Cuad.';'Var+knn';'Rel+Lin.';'Rel+Cuad.';'Rel+knn' 
;},'Location','Best') 
% Distancia minima 
tar=[100,0,1]; 
ren=[mean(acc_test_lr)' std(acc_test_lr)' [1:P]']; 
dd=pdist2(tar,ren); 
[~,in]=min(dd); 
 
plot(in,mean(acc_test_lr(:,in)),'rd','markersize',10) 
plot(in,mean(acc_test_qr(:,in)),'rd','markersize',10) 
plot(in,mean(acc_test_kr(:,in)),'rd','markersize',10) 
 
plot(in,mean(acc_test_lp(:,in)),'rd','markersize',10) 
plot(in,mean(acc_test_qp(:,in)),'rd','markersize',10) 
plot(in,mean(acc_test_kp(:,in)),'rd','markersize',10) 
 
tar1=[100,0]; 
ren1=[mean(acc_test_lr)' std(acc_test_lr)']; 
dd1=pdist2(tar1,ren1); 
[~,in2]=min(dd1); 
plot(in2,mean(acc_test_lr(:,in2)),'rd','markersize',10) 
plot(in2,mean(acc_test_qr(:,in2)),'rd','markersize',10) 
plot(in2,mean(acc_test_kr(:,in2)),'rd','markersize',10) 
 
plot(in2,mean(acc_test_lp(:,in2)),'rd','markersize',10) 
plot(in2,mean(acc_test_qp(:,in2)),'rd','markersize',10) 
plot(in2,mean(acc_test_kp(:,in2)),'rd','markersize',10) 
 
title(' Distancia minima ') 
print(' Distancia minima','-dpng' ) 
hold off 
grid on 
 
%% 
 
ynew=predict(mdl_qp,Xnewr); 
 
mdldef_qp = fitcdiscr(Xnewr,ynew,'Discrimtype','pseudoquadratic'); 
 
%Vector Indice de Caracteristicas Relevantes 
indr_pcanew = []; 
for i = 1 : nr 
[itest1{1,i}, itrain1{1,i}] = crossvalind('HoldOut',ynew,ptrain); 
Xtrain1 = Xnewr(itrain1{1,i},:); 
[~,Wnew,lambdanew] = A_pca(Xtrain1,0.95); 
%i 
 
lOMoARcPSD|3741347 
 
 
 
rho_pcanew(i,:) = 
sum(abs(Wnew.*repmat(lambdanew(1:size(Wnew,2))',size(Wnew,1),1)),2); 
 
%normalizar relevancia de 0 a 1 
rho_pcanew(i,:) = rho_pcanew(i,:)-min(rho_pcanew(i,:)); 
rho_pcanew(i,:) = rho_pcanew(i,:)/std(rho_pcanew(i,:)); 
rho_pcanew(i,:) = rho_pcanew(i,:)/max(rho_pcanew(i,:)); 
 
[~,indr_pcanew(i,:)] = sort(rho_pcanew(i,:),'descend'); 
end

Continuar navegando