|
SHOGUN
v1.1.0
|
本页面包含了所有Python模块化接口的例子。
要运行这些例子只需要
python name_of_example.py
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,1.,1000,1],[traindat,testdat,label_traindat,1.,1000,1]]
def classifier_averaged_perceptron_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,learn_rate=1.,max_iter=1000,num_threads=1):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import AveragedPerceptron
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
labels=Labels(label_train_twoclass)
perceptron=AveragedPerceptron(feats_train, labels)
perceptron.set_learn_rate(learn_rate)
perceptron.set_max_iter(max_iter)
# only guaranteed to converge for separable data
perceptron.train()
perceptron.set_features(feats_test)
out_labels = perceptron.apply().get_labels()
#print out_labels
return perceptron, out_labels
if __name__=='__main__':
print 'AveragedPerceptron'
classifier_averaged_perceptron_modular(*parameter_list[0])
parameter_list = [[1,7],[2,8]]
def classifier_custom_kernel_modular(C=1,dim=7):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import CustomKernel
from shogun.Classifier import LibSVM
from numpy import diag,ones,sign
from numpy.random import rand,seed
seed((C,dim))
lab=sign(2*rand(dim) - 1)
data=rand(dim, dim)
symdata=data*data.T + diag(ones(dim))
kernel=CustomKernel()
kernel.set_full_kernel_matrix_from_full(data)
labels=Labels(lab)
svm=LibSVM(C, kernel, labels)
svm.train()
predictions =svm.apply()
out=svm.apply().get_labels()
return svm,out
if __name__=='__main__':
print 'custom_kernel'
classifier_custom_kernel_modular(*parameter_list[0])
import numpy
from shogun.Features import StringCharFeatures, Labels, DNA
from shogun.Kernel import WeightedDegreeStringKernel
from shogun.Classifier import SVMLight, DomainAdaptationSVM, MSG_DEBUG
traindna = ['CGCACGTACGTAGCTCGAT',
'CGACGTAGTCGTAGTCGTA',
'CGACGGGGGGGGGGTCGTA',
'CGACCTAGTCGTAGTCGTA',
'CGACCACAGTTATATAGTA',
'CGACGTAGTCGTAGTCGTA',
'CGACGTAGTTTTTTTCGTA',
'CGACGTAGTCGTAGCCCCA',
'CAAAAAAAAAAAAAAAATA',
'CGACGGGGGGGGGGGCGTA']
label_traindna = numpy.array(5*[-1.0] + 5*[1.0])
testdna = ['AGCACGTACGTAGCTCGAT',
'AGACGTAGTCGTAGTCGTA',
'CAACGGGGGGGGGGTCGTA',
'CGACCTAGTCGTAGTCGTA',
'CGAACACAGTTATATAGTA',
'CGACCTAGTCGTAGTCGTA',
'CGACGTGGGGTTTTTCGTA',
'CGACGTAGTCCCAGCCCCA',
'CAAAAAAAAAAAACCAATA',
'CGACGGCCGGGGGGGCGTA']
label_testdna = numpy.array(5*[-1.0] + 5*[1.0])
traindna2 = ['AGACAGTCAGTCGATAGCT',
'AGCAGTCGTAGTCGTAGTC',
'AGCAGGGGGGGGGGTAGTC',
'AGCAATCGTAGTCGTAGTC',
'AGCAACACGTTCTCTCGTC',
'AGCAGTCGTAGTCGTAGTC',
'AGCAGTCGTTTTTTTAGTC',
'AGCAGTCGTAGTCGAAAAC',
'ACCCCCCCCCCCCCCCCTC',
'AGCAGGGGGGGGGGGAGTC']
label_traindna2 = numpy.array(5*[-1.0] + 5*[1.0])
testdna2 = ['CGACAGTCAGTCGATAGCT',
'CGCAGTCGTAGTCGTAGTC',
'ACCAGGGGGGGGGGTAGTC',
'AGCAATCGTAGTCGTAGTC',
'AGCCACACGTTCTCTCGTC',
'AGCAATCGTAGTCGTAGTC',
'AGCAGTGGGGTTTTTAGTC',
'AGCAGTCGTAAACGAAAAC',
'ACCCCCCCCCCCCAACCTC',
'AGCAGGAAGGGGGGGAGTC']
label_testdna2 = numpy.array(5*[-1.0] + 5*[1.0])
parameter_list = [[traindna,testdna,label_traindna,label_testdna,traindna2,label_traindna2, \
testdna2,label_testdna2,1,3],[traindna,testdna,label_traindna,label_testdna,traindna2,label_traindna2, \
testdna2,label_testdna2,2,5]]
def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
label_train_dna=label_traindna, \
label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):
feats_train = StringCharFeatures(fm_train_dna, DNA)
feats_test = StringCharFeatures(fm_test_dna, DNA)
kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
labels = Labels(label_train_dna)
svm = SVMLight(C, kernel, labels)
svm.train()
#svm.io.set_loglevel(MSG_DEBUG)
#####################################
#print "obtaining DA SVM from previously trained SVM"
feats_train2 = StringCharFeatures(fm_train_dna, DNA)
feats_test2 = StringCharFeatures(fm_test_dna, DNA)
kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
labels2 = Labels(label_train_dna)
# we regularize against the previously obtained solution
dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
dasvm.train()
out = dasvm.apply(feats_test2).get_labels()
return out #,dasvm TODO
if __name__=='__main__':
print 'SVMLight'
classifier_domainadaptationsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
parameter_list = [[traindat,testdat,label_traindat]]
def classifier_gaussiannaivebayes_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import GaussianNaiveBayes
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
labels=Labels(label_train_multiclass)
gnb=GaussianNaiveBayes(feats_train, labels)
gnb_train = gnb.train()
output=gnb.apply(feats_test).get_labels()
return gnb, gnb_train, output
if __name__=='__main__':
print 'GaussianNaiveBayes'
classifier_gaussiannaivebayes_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
def classifier_gmnpsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.Classifier import GMNPSVM
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train_multiclass)
svm=GMNPSVM(C, kernel, labels)
svm.set_epsilon(epsilon)
svm.train(feats_train)
kernel.init(feats_train, feats_test)
out=svm.apply(feats_test).get_labels()
return out,kernel
if __name__=='__main__':
print 'GMNPSVM'
classifier_gmnpsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
def classifier_gpbtsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.Classifier import GPBTSVM
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train_twoclass)
svm=GPBTSVM(C, kernel, labels)
svm.set_epsilon(epsilon)
svm.train()
kernel.init(feats_train, feats_test)
svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'GPBTSVM'
classifier_gpbtsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
parameter_list = [[traindat,testdat,label_traindat,3],[traindat,testdat,label_traindat,3]]
def classifier_knn_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import KNN
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
labels=Labels(label_train_multiclass)
knn=KNN(k, distance, labels)
knn_train = knn.train()
output=knn.apply(feats_test).get_labels()
multiple_k=knn.classify_for_multiple_k()
return knn,knn_train,output,multiple_k
if __name__=='__main__':
print 'KNN'
classifier_knn_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
parameter_list = [[traindat,testdat,label_traindat,0.9,1,2000],[traindat,testdat,label_traindat,3,1,5000]]
def classifier_larank_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,C=0.9,num_threads=1,num_iter=5):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.Classifier import LaRank
from shogun.Mathematics import Math_init_random
Math_init_random(17)
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
width=2.1
kernel=GaussianKernel(feats_train, feats_train, width)
epsilon=1e-5
labels=Labels(label_train_multiclass)
svm=LaRank(C, kernel, labels)
#svm.set_tau(1e-3)
svm.set_batch_mode(False)
#svm.io.enable_progress()
svm.set_epsilon(epsilon)
svm.train()
out=svm.apply(feats_train).get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'LaRank'
classifier_larank_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,3,1],[traindat,testdat,label_traindat,4,1]]
def classifier_lda_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,gamma=3,num_threads=1):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import LDA
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
labels=Labels(label_train_twoclass)
lda=LDA(gamma, feats_train, labels)
lda.train()
lda.get_bias()
lda.get_w()
lda.set_features(feats_test)
lda.apply().get_labels()
return lda,lda.apply().get_labels()
if __name__=='__main__':
print 'LDA'
classifier_lda_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,0.9,1e-3],[traindat,testdat,label_traindat,0.8,1e-2]]
def classifier_liblinear_modular(fm_train_real, fm_test_real,
label_train_twoclass, C, epsilon):
from shogun.Features import RealFeatures, SparseRealFeatures, Labels
from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL
from shogun.Mathematics import Math_init_random
Math_init_random(17)
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
labels=Labels(label_train_twoclass)
svm=LibLinear(C, feats_train, labels)
svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
svm.set_epsilon(epsilon)
svm.set_bias_enabled(True)
svm.train()
svm.set_features(feats_test)
svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'LibLinear'
classifier_liblinear_modular(*parameter_list[0])
from numpy import * from numpy.random import randn from shogun.Features import * from shogun.Classifier import * from shogun.Kernel import * num=1000 dist=1 width=2.1 C=1 traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1) testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1); trainlab=concatenate((-ones(num), ones(num))); testlab=concatenate((-ones(num), ones(num))); feats_train=RealFeatures(traindata_real); feats_test=RealFeatures(testdata_real); kernel=GaussianKernel(feats_train, feats_train, width); labels=Labels(trainlab); svm=LibSVM(C, kernel, labels); svm.train(); kernel.init(feats_train, feats_test); out=svm.apply().get_labels(); testerr=mean(sign(out)!=testlab) print testerr
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
def classifier_libsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.Classifier import LibSVM
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train_twoclass)
svm=LibSVM(C, kernel, labels)
svm.set_epsilon(epsilon)
svm.train()
kernel.init(feats_train, feats_test)
labels = svm.apply().get_labels()
supportvectors = sv_idx=svm.get_support_vectors()
alphas=svm.get_alphas()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'LibSVM'
classifier_libsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
def classifier_libsvmmulticlass_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.Classifier import LibSVMMultiClass
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train_multiclass)
svm=LibSVMMultiClass(C, kernel, labels)
svm.set_epsilon(epsilon)
svm.train()
kernel.init(feats_train, feats_test)
out = svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'LibSVMMultiClass'
classifier_libsvmmulticlass_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,2.2,1,1e-7],[traindat,testdat,2.1,1,1e-5]]
def classifier_libsvmoneclass_modular (fm_train_real=traindat,fm_test_real=testdat,width=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.Classifier import LibSVMOneClass
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
svm=LibSVMOneClass(C, kernel)
svm.set_epsilon(epsilon)
svm.train()
kernel.init(feats_train, feats_test)
svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'LibSVMOneClass'
classifier_libsvmoneclass_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,1,1e-5],[traindat,testdat,label_traindat,0.9,1e-5]]
def classifier_mpdsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.Classifier import MPDSVM
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
width=2.1
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train_twoclass)
svm=MPDSVM(C, kernel, labels)
svm.set_epsilon(epsilon)
svm.train()
kernel.init(feats_train, feats_test)
svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'MPDSVM'
classifier_mpdsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,1.,1000,1],[traindat,testdat,label_traindat,1.,1000,1]]
def classifier_perceptron_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,learn_rate=1.,max_iter=1000,num_threads=1):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import Perceptron
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
labels=Labels(label_train_twoclass)
perceptron=Perceptron(feats_train, labels)
perceptron.set_learn_rate(learn_rate)
perceptron.set_max_iter(max_iter)
# only guaranteed to converge for separable data
perceptron.train()
perceptron.set_features(feats_test)
out_labels = perceptron.apply().get_labels()
return perceptron, out_labels
if __name__=='__main__':
print 'Perceptron'
classifier_perceptron_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
train=lm.load_numbers('../data/fm_train_real.dat')
test=lm.load_numbers('../data/fm_test_real.dat')
labels=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[train,test,labels,5,1e-3,3.0], [train,test,labels,0.9,1e-2,1.0]]
def classifier_subgradientsvm_modular(fm_train_real, fm_test_real,
label_train_twoclass, C, epsilon, max_train_time):
from shogun.Features import RealFeatures, SparseRealFeatures, Labels
from shogun.Classifier import SubGradientSVM
realfeat=RealFeatures(fm_train_real)
feats_train=SparseRealFeatures()
feats_train.obtain_from_simple(realfeat)
realfeat=RealFeatures(fm_test_real)
feats_test=SparseRealFeatures()
feats_test.obtain_from_simple(realfeat)
labels=Labels(label_train_twoclass)
svm=SubGradientSVM(C, feats_train, labels)
svm.set_epsilon(epsilon)
svm.set_max_train_time(max_train_time)
svm.train()
svm.set_features(feats_test)
labels=svm.apply().get_labels()
return labels, svm
if __name__=='__main__':
print 'SubGradientSVM'
classifier_subgradientsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
train_dna=lm.load_dna('../data/fm_train_dna.dat')
test_dna=lm.load_dna('../data/fm_test_dna.dat')
label=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[train_dna, test_dna, label, 20, 0.9, 1e-3, 1],
[train_dna, test_dna, label, 20, 2.3, 1e-5, 4]]
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna,
label_train_dna, degree, C, epsilon, num_threads):
from shogun.Features import StringCharFeatures, Labels, DNA
from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG
try:
from shogun.Classifier import SVMLight
except ImportError:
print 'No support for SVMLight available.'
return
feats_train=StringCharFeatures(DNA)
#feats_train.io.set_loglevel(MSG_DEBUG)
feats_train.set_features(fm_train_dna)
feats_test=StringCharFeatures(DNA)
feats_test.set_features(fm_test_dna)
degree=20
kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
labels=Labels(label_train_dna)
svm=SVMLight(C, kernel, labels)
svm.set_epsilon(epsilon)
svm.parallel.set_num_threads(num_threads)
svm.train()
kernel.init(feats_train, feats_test)
#print 'SVMLight Objective: %f num_sv: %d' % \
# (svm.get_objective(), svm.get_num_support_vectors())
svm.set_batch_computation_enabled(False)
svm.set_linadd_enabled(False)
svm.apply().get_labels()
svm.set_batch_computation_enabled(True)
labels = svm.apply().get_labels()
return labels, svm
if __name__=='__main__':
print 'SVMlight batch'
classifier_svmlight_batch_linadd_modular(*parameter_list[0])
import numpy
traindna=['CGCACGTACGTAGCTCGAT',
'CGACGTAGTCGTAGTCGTA',
'CGACGGGGGGGGGGTCGTA',
'CGACCTAGTCGTAGTCGTA',
'CGACCACAGTTATATAGTA',
'CGACGTAGTCGTAGTCGTA',
'CGACGTAGTTTTTTTCGTA',
'CGACGTAGTCGTAGCCCCA',
'CAAAAAAAAAAAAAAAATA',
'CGACGGGGGGGGGGGCGTA']
label_traindna=numpy.array(5*[-1.0] + 5*[1.0])
testdna=['AGCACGTACGTAGCTCGAT',
'AGACGTAGTCGTAGTCGTA',
'CAACGGGGGGGGGGTCGTA',
'CGACCTAGTCGTAGTCGTA',
'CGAACACAGTTATATAGTA',
'CGACCTAGTCGTAGTCGTA',
'CGACGTGGGGTTTTTCGTA',
'CGACGTAGTCCCAGCCCCA',
'CAAAAAAAAAAAACCAATA',
'CGACGGCCGGGGGGGCGTA']
label_test_dna=numpy.array(5*[-1.0] + 5*[1.0])
parameter_list = [[traindna,testdna,label_traindna,3,10,1e-5,1],[traindna,testdna,label_traindna,3,10,1e-5,1]]
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
label_train_dna=label_traindna,degree=3, \
C=10,epsilon=1e-5,num_threads=1):
from shogun.Features import StringCharFeatures, Labels, DNA
from shogun.Kernel import WeightedDegreeStringKernel
from shogun.Classifier import SVMLight
feats_train=StringCharFeatures(DNA)
feats_train.set_features(fm_train_dna)
feats_test=StringCharFeatures(DNA)
feats_test.set_features(fm_test_dna)
kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
labels=Labels(label_train_dna)
svm=SVMLight(C, kernel, labels)
svm.set_qpsize(3)
svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
svm.set_epsilon(epsilon)
svm.parallel.set_num_threads(num_threads)
svm.train()
kernel.init(feats_train, feats_test)
out = svm.apply().get_labels()
return out,kernel
if __name__=='__main__':
print 'SVMLight'
classifier_svmlight_linear_term_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')
parameter_list = [[traindat,testdat,label_traindat,1.1,1e-5,1],[traindat,testdat,label_traindat,1.2,1e-5,1]]
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
from shogun.Features import StringCharFeatures, Labels, DNA
from shogun.Kernel import WeightedDegreeStringKernel
try:
from shogun.Classifier import SVMLight
except ImportError:
print 'No support for SVMLight available.'
return
feats_train=StringCharFeatures(DNA)
feats_train.set_features(fm_train_dna)
feats_test=StringCharFeatures(DNA)
feats_test.set_features(fm_test_dna)
degree=20
kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
labels=Labels(label_train_dna)
svm=SVMLight(C, kernel, labels)
svm.set_epsilon(epsilon)
svm.parallel.set_num_threads(num_threads)
svm.train()
kernel.init(feats_train, feats_test)
svm.apply().get_labels()
return kernel
if __name__=='__main__':
print 'SVMLight'
classifier_svmlight_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]]
def classifier_svmlin_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):
from shogun.Features import RealFeatures, SparseRealFeatures, Labels
from shogun.Classifier import SVMLin
realfeat=RealFeatures(fm_train_real)
feats_train=SparseRealFeatures()
feats_train.obtain_from_simple(realfeat)
realfeat=RealFeatures(fm_test_real)
feats_test=SparseRealFeatures()
feats_test.obtain_from_simple(realfeat)
labels=Labels(label_train_twoclass)
svm=SVMLin(C, feats_train, labels)
svm.set_epsilon(epsilon)
svm.parallel.set_num_threads(num_threads)
svm.set_bias_enabled(True)
svm.train()
svm.set_features(feats_test)
svm.get_bias()
svm.get_w()
svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'SVMLin'
classifier_svmlin_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]]
def classifier_svmocas_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):
from shogun.Features import RealFeatures, SparseRealFeatures, Labels
from shogun.Classifier import SVMOcas
realfeat=RealFeatures(fm_train_real)
feats_train=SparseRealFeatures()
feats_train.obtain_from_simple(realfeat)
realfeat=RealFeatures(fm_test_real)
feats_test=SparseRealFeatures()
feats_test.obtain_from_simple(realfeat)
labels=Labels(label_train_twoclass)
svm=SVMOcas(C, feats_train, labels)
svm.set_epsilon(epsilon)
svm.parallel.set_num_threads(num_threads)
svm.set_bias_enabled(False)
svm.train()
svm.set_features(feats_test)
svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'SVMOcas'
classifier_svmocas_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,0.9,1,6],[traindat,testdat,label_traindat,0.8,1,5]]
def classifier_svmsgd_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,num_threads=1,num_iter=5):
from shogun.Features import RealFeatures, SparseRealFeatures, Labels
from shogun.Classifier import SVMSGD
realfeat=RealFeatures(fm_train_real)
feats_train=SparseRealFeatures()
feats_train.obtain_from_simple(realfeat)
realfeat=RealFeatures(fm_test_real)
feats_test=SparseRealFeatures()
feats_test.obtain_from_simple(realfeat)
labels=Labels(label_train_twoclass)
svm=SVMSGD(C, feats_train, labels)
svm.set_epochs(num_iter)
#svm.io.set_loglevel(0)
svm.train()
svm.set_features(feats_test)
svm.apply().get_labels()
predictions = svm.apply()
return predictions, svm, predictions.get_labels()
if __name__=='__main__':
print 'SVMSGD'
classifier_svmsgd_modular(*parameter_list[0])
##!/usr/bin/env python
#"""
#Explicit examples on how to use clustering
#"""
from numpy import array, append
from shogun.Distribution import GMM
from shogun.Library import Math_init_random
Math_init_random(5)
real_gmm=GMM(2,0)
real_gmm.set_nth_mean(array([1.0, 1.0]), 0)
real_gmm.set_nth_mean(array([-1.0, -1.0]), 1)
real_gmm.set_nth_cov(array([[1.0, 0.2],[0.2, 0.1]]), 0)
real_gmm.set_nth_cov(array([[0.3, 0.1],[0.1, 1.0]]), 1)
real_gmm.set_coef(array([0.3, 0.7]))
generated=array([real_gmm.sample()])
for i in range(199):
generated=append(generated, array([real_gmm.sample()]), axis=0)
generated=generated.transpose()
parameter_list = [[generated,2,1e-9,1000,1e-9,0]]
def clustering_gmm_modular (fm_train=generated,n=2,min_cov=1e-9,max_iter=1000,min_change=1e-9,cov_type=0):
from shogun.Distribution import GMM
from shogun.Features import RealFeatures
from shogun.Library import Math_init_random
Math_init_random(5)
feat_train=RealFeatures(generated)
est_gmm=GMM(n, cov_type)
est_gmm.train(feat_train)
est_gmm.train_em(min_cov, max_iter, min_change)
return est_gmm
if __name__=='__main__':
print 'GMM'
clustering_gmm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[traindat,3],[traindat,4]]
def clustering_hierarchical_modular (fm_train=traindat,merges=3):
from shogun.Distance import EuclidianDistance
from shogun.Features import RealFeatures
from shogun.Clustering import Hierarchical
feats_train=RealFeatures(fm_train)
distance=EuclidianDistance(feats_train, feats_train)
hierarchical=Hierarchical(merges, distance)
hierarchical.train()
out_distance = hierarchical.get_merge_distances()
out_cluster = hierarchical.get_cluster_pairs()
return hierarchical,out_distance,out_cluster
if __name__=='__main__':
print 'Hierarchical'
clustering_hierarchical_modular(*parameter_list[0])
##!/usr/bin/env python
#"""
#Explicit examples on how to use clustering
#"""
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[traindat,3],[traindat,4]]
def clustering_kmeans_modular (fm_train=traindat,k=3):
from shogun.Distance import EuclidianDistance
from shogun.Features import RealFeatures
from shogun.Clustering import KMeans
from shogun.Mathematics import Math_init_random
Math_init_random(17)
feats_train=RealFeatures(fm_train)
distance=EuclidianDistance(feats_train, feats_train)
kmeans=KMeans(k, distance)
kmeans.train()
out_centers = kmeans.get_cluster_centers()
kmeans.get_radiuses()
return out_centers, kmeans
if __name__=='__main__':
print 'KMeans'
clustering_kmeans_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_diffusionmaps_modular(data,t):
from shogun.Features import RealFeatures
from shogun.Converter import DiffusionMaps
from shogun.Kernel import GaussianKernel
features = RealFeatures(data)
converter = DiffusionMaps()
converter.set_target_dim(1)
converter.set_kernel(GaussianKernel(10,10.0))
converter.set_t(t)
converter.apply(features)
return features
if __name__=='__main__':
print 'DiffusionMaps'
converter_diffusionmaps_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_hessianlocallylinearembedding_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import HessianLocallyLinearEmbedding
features = RealFeatures(data)
converter = HessianLocallyLinearEmbedding()
converter.set_target_dim(1)
converter.set_k(k)
converter.apply(features)
return features
if __name__=='__main__':
print 'HessianLocallyLinearEmbedding'
converter_hessianlocallylinearembedding_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data]]
def converter_isomap_modular(data):
from shogun.Features import RealFeatures
from shogun.Converter import Isomap
features = RealFeatures(data)
converter = Isomap()
converter.set_landmark(True)
converter.set_landmark_number(5)
converter.set_k(6)
converter.set_target_dim(1)
converter.apply(features)
return features
if __name__=='__main__':
print 'Isomap'
converter_isomap_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_kernellocallylinearembedding_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import KernelLocallyLinearEmbedding
from shogun.Kernel import LinearKernel
features = RealFeatures(data)
kernel = LinearKernel()
converter = KernelLocallyLinearEmbedding(kernel)
converter.set_target_dim(1)
converter.set_k(k)
converter.apply(features)
return features
if __name__=='__main__':
print 'KernelLocallyLinearEmbedding'
converter_kernellocallylinearembedding_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_kernellocaltangentspacealignment_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import KernelLocalTangentSpaceAlignment
features = RealFeatures(data)
converter = KernelLocalTangentSpaceAlignment()
converter.set_target_dim(1)
converter.set_k(k)
converter.apply(features)
return features
if __name__=='__main__':
print 'KernelLocalTangentSpaceAlignment'
converter_kernellocaltangentspacealignment_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_laplacianeigenmaps_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import LaplacianEigenmaps
features = RealFeatures(data)
converter = LaplacianEigenmaps()
converter.set_target_dim(1)
converter.set_k(k)
converter.set_tau(2.0)
converter.apply(features)
return features
if __name__=='__main__':
print 'LaplacianEigenmaps'
converter_laplacianeigenmaps_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_linearlocaltangentspacealignment_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import LinearLocalTangentSpaceAlignment
features = RealFeatures(data)
converter = LinearLocalTangentSpaceAlignment()
converter.set_target_dim(1)
converter.set_k(k)
converter.apply(features)
return features
if __name__=='__main__':
print 'LinearLocalTangentSpaceAlignment'
converter_linearlocaltangentspacealignment_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_localitypreservingprojections_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import LocalityPreservingProjections
features = RealFeatures(data)
converter = LocalityPreservingProjections()
converter.set_target_dim(1)
converter.set_k(k)
converter.set_tau(2.0)
converter.apply(features)
return features
if __name__=='__main__':
print 'LocalityPreservingProjections'
converter_localitypreservingprojections_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_locallylinearembedding_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import LocallyLinearEmbedding
features = RealFeatures(data)
converter = LocallyLinearEmbedding()
converter.set_target_dim(1)
converter.set_k(k)
converter.apply(features)
return features
if __name__=='__main__':
print 'LocallyLinearEmbedding'
converter_locallylinearembedding_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_localtangentspacealignment_modular(data,k):
from shogun.Features import RealFeatures
from shogun.Converter import LocalTangentSpaceAlignment
features = RealFeatures(data)
converter = LocalTangentSpaceAlignment()
converter.set_target_dim(1)
converter.set_k(k)
converter.apply(features)
return features
if __name__=='__main__':
print 'LocalTangentSpaceAlignment'
converter_localtangentspacealignment_modular(*parameter_list[0])
from tools.load import LoadMatrix
import numpy
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data]]
def converter_multidimensionalscaling_modular(data):
from shogun.Features import RealFeatures
from shogun.Converter import MultidimensionalScaling
from shogun.Distance import EuclidianDistance
features = RealFeatures(data)
distance_before = EuclidianDistance()
distance_before.init(features,features)
converter = MultidimensionalScaling()
converter.set_target_dim(2)
converter.set_landmark(False)
embedding =converter.apply(features)
distance_after = EuclidianDistance()
distance_after.init(embedding,embedding)
distance_matrix_after = distance_after.get_distance_matrix()
distance_matrix_before = distance_before.get_distance_matrix()
return numpy.linalg.norm(distance_matrix_after-distance_matrix_before)/numpy.linalg.norm(distance_matrix_before)
if __name__=='__main__':
print 'MultidimensionalScaling'
converter_multidimensionalscaling_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_braycurtis_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import BrayCurtisDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=BrayCurtisDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'BrayCurtisDistance'
distance_braycurtis_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_canberra_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import CanberraMetric
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=CanberraMetric(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'CanberaMetric'
distance_canberra_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindna,testdna,3,0,False],[traindna,testdna,3,0,False]]
def distance_canberraword_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False):
from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
from shogun.Preprocessor import SortWordString
from shogun.Distance import CanberraWordDistance
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_train_dna)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortWordString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_test_dna)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
distance=CanberraWordDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'CanberraWordDistance'
distance_canberraword_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_chebyshew_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import ChebyshewMetric
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=ChebyshewMetric(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'ChebyshewMetric'
distance_chebyshew_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,],[traindat,testdat]]
def distance_chisquare_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import ChiSquareDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=ChiSquareDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'ChiSquareDistance'
distance_chisquare_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_cosine_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import CosineDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=CosineDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'CosineDistance'
distance_cosine_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_euclidian_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'EuclidianDistance'
distance_euclidian_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_geodesic_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import GeodesicMetric
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=GeodesicMetric(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'GeodesicMetric'
distance_geodesic_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
testdat = lm.load_labels('../data/fm_test_real.dat')
parameter_list = [[traindna,testdna,testdat,4,0,False,False],
[traindna,testdna,testdat,3,0,False,False]]
def distance_hammingword_modular (fm_train_dna=traindna,fm_test_dna=testdna,
fm_test_real=testdat,order=3,gap=0,reverse=False,use_sign=False):
from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
from shogun.Preprocessor import SortWordString
from shogun.Distance import HammingWordDistance
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_train_dna)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortWordString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_test_dna)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
distance=HammingWordDistance(feats_train, feats_train, use_sign)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'HammingWordDistance'
distance_hammingword_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_jensen_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import JensenMetric
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=JensenMetric(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'JensenMetric'
distance_jensen_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_manhatten_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import ManhattanMetric
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=ManhattanMetric(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'ManhattanMetric'
distance_manhatten_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindna,testdna,testdat,3,0,False],[traindna,testdna,testdat,4,0,False]]
def distance_manhattenword_modular (fm_train_dna=traindna ,fm_test_dna=testdna,fm_test_real=testdat,order=3,gap=0,reverse=False):
from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
from shogun.Preprocessor import SortWordString
from shogun.Distance import ManhattanWordDistance
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_train_dna)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortWordString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_test_dna)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
distance=ManhattanWordDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return dm_train,dm_test
if __name__=='__main__':
print 'ManhattanWordDistance'
distance_manhattenword_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,3],[traindat,testdat,4]]
def distance_minkowski_modular (fm_train_real=traindat,fm_test_real=testdat,k=3):
from shogun.Features import RealFeatures
from shogun.Distance import MinkowskiMetric
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=MinkowskiMetric(feats_train, feats_train, k)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'MinkowskiMetric'
distance_minkowski_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_normsquared_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
distance.set_disable_sqrt(True)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'EuclidianDistance - NormSquared'
distance_normsquared_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_sparseeuclidean_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures, SparseRealFeatures
from shogun.Distance import SparseEuclidianDistance
realfeat=RealFeatures(fm_train_real)
feats_train=SparseRealFeatures()
feats_train.obtain_from_simple(realfeat)
realfeat=RealFeatures(fm_test_real)
feats_test=SparseRealFeatures()
feats_test.obtain_from_simple(realfeat)
distance=SparseEuclidianDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'SparseEuclidianDistance'
distance_sparseeuclidean_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_tanimoto_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Distance import TanimotoDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=TanimotoDistance(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
if __name__=='__main__':
print 'TanimotoDistance'
distance_tanimoto_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
parameter_list = [[traindna,3,0,False],[traindna,4,0,False]]
def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False):
from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
from shogun.Distribution import Histogram
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_dna)
feats=StringWordFeatures(charfeat.get_alphabet())
feats.obtain_from_char(charfeat, order-1, order, gap, reverse)
histo=Histogram(feats)
histo.train()
histo.get_histogram()
num_examples=feats.get_num_vectors()
num_param=histo.get_num_model_parameters()
#for i in xrange(num_examples):
# for j in xrange(num_param):
# histo.get_log_derivative(j, i)
out_likelihood = histo.get_log_likelihood()
out_sample = histo.get_log_likelihood_sample()
return histo,out_sample,out_likelihood
###########################################################################
# call functions
###########################################################################
if __name__=='__main__':
print 'Histogram'
distribution_histogram_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[data, 1, 64, 1e-5, 2, 0, False, 5], [data, 3, 6, 1e-1, 1, 0, False, 2]]
def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples):
from shogun.Features import StringWordFeatures, StringCharFeatures, CUBE
from shogun.Distribution import HMM, BW_NORMAL
charfeat=StringCharFeatures(CUBE)
charfeat.set_features(fm_cube)
feats=StringWordFeatures(charfeat.get_alphabet())
feats.obtain_from_char(charfeat, order-1, order, gap, reverse)
hmm=HMM(feats, N, M, pseudo)
hmm.train()
hmm.baum_welch_viterbi_train(BW_NORMAL)
num_examples=feats.get_num_vectors()
num_param=hmm.get_num_model_parameters()
for i in xrange(num_examples):
for j in xrange(num_param):
hmm.get_log_derivative(j, i)
best_path=0
best_path_state=0
for i in xrange(num_examples):
best_path+=hmm.best_path(i)
for j in xrange(N):
best_path_state+=hmm.get_best_path_state(i, j)
lik_example = hmm.get_log_likelihood()
lik_sample = hmm.get_log_likelihood_sample()
return lik_example, lik_sample, hmm
###########################################################################
# call functions
###########################################################################
if __name__=='__main__':
print 'HMM'
distribution_hmm_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
parameter_list = [[traindna,3,0,False],[traindna,4,0,False]]
def distribution_linearhmm_modular (fm_dna=traindna,order=3,gap=0,reverse=False):
from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
from shogun.Distribution import LinearHMM
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_dna)
feats=StringWordFeatures(charfeat.get_alphabet())
feats.obtain_from_char(charfeat, order-1, order, gap, reverse)
hmm=LinearHMM(feats)
hmm.train()
hmm.get_transition_probs()
num_examples=feats.get_num_vectors()
num_param=hmm.get_num_model_parameters()
for i in xrange(num_examples):
for j in xrange(num_param):
hmm.get_log_derivative(j, i)
out_likelihood = hmm.get_log_likelihood()
out_sample = hmm.get_log_likelihood_sample()
return hmm,out_likelihood ,out_sample
###########################################################################
# call functions
###########################################################################
if __name__=='__main__':
distribution_linearhmm_modular(*parameter_list[0])
print 'LinearHMM'
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
parameter_list = [[traindna,3],[traindna,4]]
def distribution_ppwm_modular (fm_dna=traindna, order=3):
from shogun.Features import StringByteFeatures, StringCharFeatures, DNA
from shogun.Distribution import PositionalPWM
from numpy import array,e,log,exp
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_dna)
feats=StringByteFeatures(charfeat.get_alphabet())
feats.obtain_from_char(charfeat, order-1, order, 0, False)
L=20
k=3
sigma = 1;
mu = 4
ppwm=PositionalPWM()
ppwm.set_sigma(sigma)
ppwm.set_mean(mu)
pwm=array([[0.0, 0.5, 0.1, 1.0],
[0.0, 0.5, 0.5, 0.0],
[1.0, 0.0, 0.4, 0.0],
[0.0, 0.0, 0.0, 0.0]]);
pwm=array([[0.01,0.09,0.1],[0.09,0.01,0.1],[0.85,0.4,0.1],[0.05,0.5,0.7]])
ppwm.set_pwm(log(pwm))
#print ppwm.get_pwm()
ppwm.compute_w(L)
w=ppwm.get_w()
#print w
#from pylab import *
#figure(1)
#pcolor(exp(w))
#pcolor(w)
#colorbar()
#figure(2)
ppwm.compute_scoring(1)
u=ppwm.get_scoring(0)
#pcolor(exp(u))
#show()
#ppwm=PositionalPWM(feats)
#ppwm.train()
#out_likelihood = histo.get_log_likelihood()
#out_sample = histo.get_log_likelihood_sample()
return w,u
###########################################################################
# call functions
###########################################################################
if __name__=='__main__':
print 'PositionalPWM'
distribution_ppwm_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()
ground_truth = lm.load_labels('../data/label_train_twoclass.dat')
random.seed(17)
predicted = random.randn(len(ground_truth))
parameter_list = [[ground_truth,predicted]]
def evaluation_contingencytableevaluation_modular(ground_truth, predicted):
from shogun.Features import Labels
from shogun.Evaluation import ContingencyTableEvaluation
from shogun.Evaluation import AccuracyMeasure,ErrorRateMeasure,BALMeasure
from shogun.Evaluation import WRACCMeasure,F1Measure,CrossCorrelationMeasure
from shogun.Evaluation import RecallMeasure,PrecisionMeasure,SpecificityMeasure
ground_truth_labels = Labels(ground_truth)
predicted_labels = Labels(predicted)
base_evaluator = ContingencyTableEvaluation()
base_evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = AccuracyMeasure()
accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = ErrorRateMeasure()
errorrate = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = BALMeasure()
bal = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = WRACCMeasure()
wracc = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = F1Measure()
f1 = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = CrossCorrelationMeasure()
crosscorrelation = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = RecallMeasure()
recall = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = PrecisionMeasure()
precision = evaluator.evaluate(predicted_labels,ground_truth_labels)
evaluator = SpecificityMeasure()
specificity = evaluator.evaluate(predicted_labels,ground_truth_labels)
return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
if __name__=='__main__':
print 'ContingencyTableEvaluation'
evaluation_contingencytableevaluation_modular(*parameter_list[0])
from tools.load import LoadMatrix from numpy import random lm=LoadMatrix() N = 100 random.seed(17) ground_truth = random.randn(N) predicted = random.randn(N) parameter_list = [[ground_truth,predicted]] def evaluation_meansquarederror_modular(ground_truth, predicted): from shogun.Features import Labels from shogun.Evaluation import MeanSquaredError ground_truth_labels = Labels(ground_truth) predicted_labels = Labels(predicted) evaluator = MeanSquaredError() mse = evaluator.evaluate(predicted_labels,ground_truth_labels) return mse if __name__=='__main__': print 'MeanSquaredError' evaluation_meansquarederror_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()
random.seed(17)
ground_truth = lm.load_labels('../data/label_train_multiclass.dat')
predicted = lm.load_labels('../data/label_train_multiclass.dat') * 2
parameter_list = [[ground_truth,predicted]]
def evaluation_multiclassaccuracy_modular(ground_truth, predicted):
from shogun.Features import Labels
from shogun.Evaluation import MulticlassAccuracy
ground_truth_labels = Labels(ground_truth)
predicted_labels = Labels(predicted)
evaluator = MulticlassAccuracy()
accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)
return accuracy
if __name__=='__main__':
print 'MulticlassAccuracy'
evaluation_multiclassaccuracy_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()
ground_truth = lm.load_labels('../data/label_train_twoclass.dat')
random.seed(17)
predicted = random.randn(len(ground_truth))
parameter_list = [[ground_truth,predicted]]
def evaluation_prcevaluation_modular(ground_truth, predicted):
from shogun.Features import Labels
from shogun.Evaluation import PRCEvaluation
ground_truth_labels = Labels(ground_truth)
predicted_labels = Labels(predicted)
evaluator = PRCEvaluation()
evaluator.evaluate(predicted_labels,ground_truth_labels)
return evaluator.get_PRC(), evaluator.get_auPRC()
if __name__=='__main__':
print 'PRCEvaluation'
evaluation_prcevaluation_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()
ground_truth = lm.load_labels('../data/label_train_twoclass.dat')
random.seed(17)
predicted = random.randn(len(ground_truth))
parameter_list = [[ground_truth,predicted]]
def evaluation_rocevaluation_modular(ground_truth, predicted):
from shogun.Features import Labels
from shogun.Evaluation import ROCEvaluation
ground_truth_labels = Labels(ground_truth)
predicted_labels = Labels(predicted)
evaluator = ROCEvaluation()
evaluator.evaluate(predicted_labels,ground_truth_labels)
return evaluator.get_ROC(), evaluator.get_auROC()
if __name__=='__main__':
print 'ROCEvaluation'
evaluation_rocevaluation_modular(*parameter_list[0])
parameter_list = [[1000]] def evaluation_thresholds_modular(index): from modshogun import Labels, ROCEvaluation import numpy numpy.random.seed(17) output=numpy.arange(-1,1,0.001) output=(0.3*output+0.7*(numpy.random.rand(len(output))-0.5)) label=[-1.0]*(len(output)/2) label.extend([1.0]*(len(output)/2)) label=numpy.array(label) pred=Labels(output) truth=Labels(label) evaluator=ROCEvaluation() evaluator.evaluate(pred, truth) [fp,tp]=evaluator.get_ROC() thresh=evaluator.get_thresholds() b=thresh[index] #print "tpr", numpy.mean(output[label>0]>b), tp[index] #print "fpr", numpy.mean(output[label<0]>b), fp[index] return tp[index],fp[index],numpy.mean(output[label>0]>b),numpy.mean(output[label<0]>b) if __name__=='__main__': print 'Evaluation with Thresholds' evaluation_thresholds_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data=lm.load_numbers('../data/fm_train_real.dat')
label=lm.load_numbers('../data/label_train_twoclass.dat')
parameter_list=[[data,label]]
def features_io_modular(fm_train_real, label_train_twoclass):
import numpy
from shogun.Features import SparseRealFeatures, RealFeatures, Labels
from shogun.Kernel import GaussianKernel
from shogun.IO import AsciiFile, BinaryFile, HDF5File
feats=SparseRealFeatures(fm_train_real)
feats2=SparseRealFeatures()
f=BinaryFile("fm_train_sparsereal.bin","w")
feats.save(f)
f=AsciiFile("fm_train_sparsereal.ascii","w")
feats.save(f)
f=BinaryFile("fm_train_sparsereal.bin")
feats2.load(f)
f=AsciiFile("fm_train_sparsereal.ascii")
feats2.load(f)
feats=RealFeatures(fm_train_real)
feats2=RealFeatures()
f=BinaryFile("fm_train_real.bin","w")
feats.save(f)
f=HDF5File("fm_train_real.h5","w", "/data/doubles")
feats.save(f)
f=AsciiFile("fm_train_real.ascii","w")
feats.save(f)
f=BinaryFile("fm_train_real.bin")
feats2.load(f)
#print "diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))
f=AsciiFile("fm_train_real.ascii")
feats2.load(f)
#print "diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))
lab=Labels(numpy.array([1.0,2.0,3.0]))
lab2=Labels()
f=AsciiFile("label_train_twoclass.ascii","w")
lab.save(f)
f=BinaryFile("label_train_twoclass.bin","w")
lab.save(f)
f=HDF5File("label_train_real.h5","w", "/data/labels")
lab.save(f)
f=AsciiFile("label_train_twoclass.ascii")
lab2.load(f)
f=BinaryFile("label_train_twoclass.bin")
lab2.load(f)
f=HDF5File("fm_train_real.h5","r", "/data/doubles")
feats2.load(f)
#print feats2.get_feature_matrix()
f=HDF5File("label_train_real.h5","r", "/data/labels")
lab2.load(f)
#print lab2.get_labels()
#clean up
import os
for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii',
'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii',
'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']:
os.unlink(f)
return feats, feats2, lab, lab2
if __name__=='__main__':
print 'Features IO'
features_io_modular(*parameter_list[0])
parameter_list=[['../data/train_sparsereal.light']]
def features_read_svmlight_format_modular(fname):
import os
from shogun.Features import SparseRealFeatures
f=SparseRealFeatures()
lab=f.load_svmlight_file(fname)
f.write_svmlight_file('testwrite.light', lab)
os.unlink('testwrite.light')
if __name__=='__main__':
print 'Reading SVMLIGHT format'
features_read_svmlight_format_modular(*parameter_list[0])
import numpy # create dense matrix A A=numpy.array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=numpy.uint8) parameter_list=[[A]] def features_simple_byte_modular(A): from shogun.Features import ByteFeatures # create dense features a # ... of type Byte a=ByteFeatures(A) # print some statistics about a #print a.get_num_vectors() #print a.get_num_features() # get first feature vector and set it #print a.get_feature_vector(0) a.set_feature_vector(numpy.array([1,4,0,0,0,9], dtype=numpy.uint8), 0) # get matrix a_out = a.get_feature_matrix() #print type(a_out), a_out.dtype #print a_out assert(numpy.all(a_out==A)) return a_out,a if __name__=='__main__': print 'ByteFeatures' features_simple_byte_modular(*parameter_list[0])
from shogun.Features import LongIntFeatures
from numpy import array, int64, all
# create dense matrix A
matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64)
parameter_list = [[matrix]]
# ... of type LongInt
def features_simple_longint_modular(A=matrix):
a=LongIntFeatures(A)
# get first feature vector and set it
a.set_feature_vector(array([1,4,0,0,0,9], dtype=int64), 0)
# get matrix
a_out = a.get_feature_matrix()
assert(all(a_out==A))
return a_out
if __name__=='__main__':
print 'simple_longint'
features_simple_longint_modular(*parameter_list[0])
from shogun.Features import RealFeatures, LongIntFeatures, ByteFeatures
from numpy import array, float64, int64, uint8, all
# create dense matrices A,B,C
matrixA=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64)
matrixB=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64)
matrixC=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=uint8)
# ... of type Real, LongInt and Byte
parameter_list = [[matrixA,matrixB,matrixC]]
def features_simple_modular(A=matrixA,B=matrixB,C=matrixC):
a=RealFeatures(A)
b=LongIntFeatures(B)
c=ByteFeatures(C)
# or 16bit wide ...
#feat1 = f.ShortFeatures(N.zeros((10,5),N.short))
#feat2 = f.WordFeatures(N.zeros((10,5),N.uint16))
# print some statistics about a
# get first feature vector and set it
a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0)
# get matrices
a_out = a.get_feature_matrix()
b_out = b.get_feature_matrix()
c_out = c.get_feature_matrix()
assert(all(a_out==A))
assert(all(b_out==B))
assert(all(c_out==C))
return a_out,b_out,c_out,a,b,c
if __name__=='__main__':
print 'simple'
features_simple_modular(*parameter_list[0])
from shogun.Features import RealFeatures
from numpy import array, float64, all
# create dense matrices A,B,C
matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64)
parameter_list = [[matrix]]
# ... of type LongInt
def features_simple_real_modular(A=matrix):
# ... of type Real, LongInt and Byte
a=RealFeatures(A)
# print some statistics about a
#print a.get_num_vectors()
#print a.get_num_features()
# get first feature vector and set it
#print a.get_feature_vector(0)
a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0)
# get matrix
a_out = a.get_feature_matrix()
assert(all(a_out==A))
return a_out
if __name__=='__main__':
print 'simple_real'
features_simple_real_modular(*parameter_list[0])
parameter_list=[['../data/snps.dat']] def features_snp_modular(fname): from shogun.Features import StringByteFeatures, SNPFeatures, SNP sf=StringByteFeatures(SNP) sf.load_ascii_file(fname, False, SNP, SNP) #print sf.get_features() snps=SNPFeatures(sf) #print snps.get_feature_matrix() #print snps.get_minor_base_string() #print snps.get_major_base_string() if __name__=='__main__': print 'SNP Features' features_snp_modular(*parameter_list[0])
import numpy # create dense matrix A A=numpy.array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=numpy.float64) parameter_list=[[A]] def features_sparse_modular(A): from scipy.sparse import csc_matrix from shogun.Features import SparseRealFeatures from numpy import array, float64, all # sparse representation X of dense matrix A # note, will work with types other than float64 too, # but requires recent scipy.sparse X=csc_matrix(A) #print A # create sparse shogun features from dense matrix A a=SparseRealFeatures(A) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) #print a_out # create sparse shogun features from sparse matrix X a.set_sparse_feature_matrix(X) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) # create sparse shogun features from sparse matrix X a=SparseRealFeatures(X) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) # obtain (data,row,indptr) csc arrays of sparse shogun features z=csc_matrix(a.get_sparse_feature_matrix()) z_out=z.todense() #print z_out assert(all(z_out==A)) if __name__=='__main__': print 'Sparse Features' features_sparse_modular(*parameter_list[0])
parameter_list = [['features_string_char_compressed_modular.py']]
def features_string_char_compressed_modular(fname):
from shogun.Features import StringCharFeatures, StringFileCharFeatures, RAWBYTE
from shogun.Library import UNCOMPRESSED,SNAPPY,LZO,GZIP,BZIP2,LZMA, MSG_DEBUG
from shogun.Preprocessor import DecompressCharString
f=StringFileCharFeatures(fname, RAWBYTE)
#print "original strings", f.get_features()
#uncompressed
f.save_compressed("foo_uncompressed.str", UNCOMPRESSED, 1)
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_uncompressed.str", True)
#print "uncompressed strings", f2.get_features()
#print
# load compressed data and uncompress on load
#snappy - not stable yet?!
#f.save_compressed("foo_snappy.str", SNAPPY, 9)
#f2=StringCharFeatures(RAWBYTE);
#f2.load_compressed("foo_snappy.str", True)
#print "snappy strings", f2.get_features()
#print
#lzo
f.save_compressed("foo_lzo.str", LZO, 9)
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_lzo.str", True)
#print "lzo strings", f2.get_features()
#print
##gzip
f.save_compressed("foo_gzip.str", GZIP, 9)
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_gzip.str", True)
#print "gzip strings", f2.get_features()
#print
#bzip2
f.save_compressed("foo_bzip2.str", BZIP2, 9)
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_bzip2.str", True)
#print "bzip2 strings", f2.get_features()
#print
#lzma
f.save_compressed("foo_lzma.str", LZMA, 9)
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_lzma.str", True)
#print "lzma strings", f2.get_features()
#print
# load compressed data and uncompress via preprocessor
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_lzo.str", False)
f2.add_preprocessor(DecompressCharString(LZO))
f2.apply_preprocessor()
#print "lzo strings", f2.get_features()
#print
# load compressed data and uncompress on-the-fly via preprocessor
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_lzo.str", False)
#f2.io.set_loglevel(MSG_DEBUG)
f2.add_preprocessor(DecompressCharString(LZO))
f2.enable_on_the_fly_preprocessing()
#print "lzo strings", f2.get_features()
#print
#clean up
import os
for f in ['foo_uncompressed.str', 'foo_snappy.str', 'foo_lzo.str', 'foo_gzip.str',
'foo_bzip2.str', 'foo_lzma.str', 'foo_lzo.str', 'foo_lzo.str']:
if os.path.exists(f):
os.unlink(f)
##########################################################################################
# some perfectly compressible stuff follows
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
if __name__=='__main__':
print 'Compressing StringCharFileFeatures'
features_string_char_compressed_modular(*parameter_list[0])
strings=['hey','guys','i','am','a','string'] parameter_list=[[strings]] def features_string_char_modular(strings): from shogun.Features import StringCharFeatures, RAWBYTE from numpy import array #create string features f=StringCharFeatures(strings, RAWBYTE) #and output several stats #print "max string length", f.get_max_vector_length() #print "number of strings", f.get_num_vectors() #print "length of first string", f.get_vector_length(0) #print "string[5]", ''.join(f.get_feature_vector(5)) #print "strings", f.get_features() #replace string 0 f.set_feature_vector(array(['t','e','s','t']), 0) #print "strings", f.get_features() return f.get_features(), f if __name__=='__main__': print 'StringCharFeatures' features_string_char_modular(*parameter_list[0])
parameter_list = [['features_string_file_char_modular.py']]
def features_string_file_char_modular(fname):
from shogun.Features import StringFileCharFeatures, RAWBYTE
f = StringFileCharFeatures(fname, RAWBYTE)
#print "strings", f.get_features()
return f
if __name__=='__main__':
print 'Compressing StringCharFileFeatures'
features_string_file_char_modular(*parameter_list[0])
parameter_list=[[".", "features_string_char_modular.py"]]
def features_string_file_modular(directory, fname):
from shogun.Features import StringCharFeatures, RAWBYTE
from shogun.IO import AsciiFile
# load features from directory
f=StringCharFeatures(RAWBYTE)
f.load_from_directory(directory)
#and output several stats
#print "max string length", f.get_max_vector_length()
#print "number of strings", f.get_num_vectors()
#print "length of first string", f.get_vector_length(0)
#print "str[0,0:3]", f.get_feature(0,0), f.get_feature(0,1), f.get_feature(0,2)
#print "len(str[0])", f.get_vector_length(0)
#print "str[0]", f.get_feature_vector(0)
#or load features from file (one string per line)
fil=AsciiFile(fname)
f.load(fil)
#print f.get_features()
#or load fasta file
#f.load_fasta('fasta.fa')
#print f.get_features()
return f.get_features(), f
if __name__=='__main__':
print 'StringWordFeatures'
features_string_file_modular(*parameter_list[0])
from shogun.Features import LongIntFeatures
from numpy import array, int64, all
# create dense matrix A
matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64)
parameter_list = [[matrix,3,1,2],[matrix,3,1,2]]
# ... of type LongInt
def features_string_hashed_wd_modular(A=matrix,order=3,start_order=1,hash_bits=2):
a=LongIntFeatures(A)
from numpy import array, uint8
from shogun.Features import HashedWDFeatures, StringByteFeatures, RAWDNA
from shogun.IO import MSG_DEBUG
x=[array([0,1,2,3,0,1,2,3,3,2,2,1,1],dtype=uint8)]
from_order=order
f=StringByteFeatures(RAWDNA)
#f.io.set_loglevel(MSG_DEBUG)
f.set_features(x)
y=HashedWDFeatures(f,start_order,order,from_order,hash_bits)
fm=y.get_computed_dot_feature_matrix()
return fm
if __name__=='__main__':
print 'string_hashed_wd'
features_string_hashed_wd_modular(*parameter_list[0])
# create string features with a single string s=10*'A' + 10*'C' + 10*'G' + 10*'T' parameter_list=[[s]] def features_string_sliding_window_modular(strings): from shogun.Features import StringCharFeatures, DNA from shogun.Library import DynamicIntArray f=StringCharFeatures([strings], DNA) # slide a window of length 5 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(5,1) #print f.get_num_vectors() #print f.get_vector_length(0) #print f.get_vector_length(1) #print f.get_features() # slide a window of length 4 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(4,1) #print f.get_num_vectors() #print f.get_vector_length(0) #print f.get_vector_length(1) #print f.get_features() # extract string-windows at position 0,6,16,25 of window size 4 # (memory efficient, does not copy strings) f.set_features([s]) positions=DynamicIntArray() positions.append_element(0) positions.append_element(6) positions.append_element(16) positions.append_element(25) f.obtain_by_position_list(4,positions) #print f.get_features() # now extract windows of size 8 from same positon list f.obtain_by_position_list(8,positions) #print f.get_features() return f if __name__=='__main__': print 'Sliding Window' features_string_sliding_window_modular(*parameter_list[0])
parameter_list = [[0,2,0,False],[0,3,0,False]]
def features_string_ulong_modular(start=0,order=2,gap=0,rev=False):
from shogun.Features import StringCharFeatures, StringUlongFeatures, RAWBYTE
from numpy import array, uint64
#create string features
cf=StringCharFeatures(['hey','guys','string'], RAWBYTE)
uf=StringUlongFeatures(RAWBYTE)
uf.obtain_from_char(cf, start,order,gap,rev)
#replace string 0
uf.set_feature_vector(array([1,2,3,4,5], dtype=uint64), 0)
return uf.get_features(),uf.get_feature_vector(2), uf.get_num_vectors()
if __name__=='__main__':
print 'simple_longint'
features_string_ulong_modular(*parameter_list[0])
strings=['hey','guys','string'] parameter_list=[[strings,0,2,0,False]] def features_string_word_modular(strings, start, order, gap, rev): from shogun.Features import StringCharFeatures, StringWordFeatures, RAWBYTE from numpy import array, uint16 #create string features cf=StringCharFeatures(strings, RAWBYTE) wf=StringWordFeatures(RAWBYTE) wf.obtain_from_char(cf, start, order, gap, rev) #and output several stats #print "max string length", wf.get_max_vector_length() #print "number of strings", wf.get_num_vectors() #print "length of first string", wf.get_vector_length(0) #print "string[2]", wf.get_feature_vector(2) #print "strings", wf.get_features() #replace string 0 wf.set_feature_vector(array([1,2,3,4,5], dtype=uint16), 0) #print "strings", wf.get_features() return wf.get_features(), wf if __name__=='__main__': print 'StringWordFeatures' features_string_word_modular(*parameter_list[0])
###########################################################################
# anova kernel
###########################################################################
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()
traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
parameter_list = [[traindat,testdat,2,10], [traindat,testdat,5,10]]
def kernel_anova_modular (fm_train_real=traindat,fm_test_real=testdat,cardinality=2, size_cache=10):
from shogun.Kernel import ANOVAKernel
from shogun.Features import RealFeatures
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=ANOVAKernel(feats_train, feats_train, cardinality, size_cache)
for i in range(0,feats_train.get_num_vectors()):
for j in range(0,feats_train.get_num_vectors()):
k1 = kernel.compute_rec1(i,j)
k2 = kernel.compute_rec2(i,j)
#if abs(k1-k2) > 1e-10:
# print "|%s|%s|" % (k1, k2)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train, km_test, kernel
if __name__=='__main__':
print 'ANOVA'
kernel_anova_modular(*parameter_list[0])
###########################################################################
# kernel can be used to maximize AUC instead of margin in SVMs
###########################################################################
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()
traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,1.7], [traindat,testdat,1.6]]
def kernel_auc_modular(fm_train_real=traindat,label_train_real=testdat,width=1.7):
from shogun.Kernel import GaussianKernel, AUCKernel
from shogun.Features import RealFeatures, Labels
feats_train=RealFeatures(fm_train_real)
subkernel=GaussianKernel(feats_train, feats_train, width)
kernel=AUCKernel(0, subkernel)
kernel.setup_auc_maximization( Labels(label_train_real) )
km_train=kernel.get_kernel_matrix()
return kernel
if __name__=='__main__':
print 'AUC'
kernel_auc_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 10.0]]
def kernel_cauchy_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import CauchyKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=CauchyKernel(feats_train, feats_train, sigma, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Cauchy'
kernel_cauchy_modular(*parameter_list[0])
###########################################################################
# chi2 kernel
###########################################################################
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()
traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
parameter_list = [[traindat,testdat,1.4,10], [traindat,testdat,1.5,10]]
def kernel_chi2_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4, size_cache=10):
from shogun.Kernel import Chi2Kernel
from shogun.Features import RealFeatures
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Chi2'
kernel_chi2_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
def kernel_circular_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import CircularKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=CircularKernel(feats_train, feats_train, sigma, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Circular'
kernel_circular_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list= [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]
def kernel_combined_custom_poly_modular(fm_train_real = traindat,fm_test_real = testdat,fm_label_twoclass=label_traindat):
from shogun.Features import CombinedFeatures, RealFeatures, Labels
from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
from shogun.Classifier import LibSVM
kernel = CombinedKernel()
feats_train = CombinedFeatures()
tfeats = RealFeatures(fm_train_real)
tkernel = PolyKernel(10,3)
tkernel.init(tfeats, tfeats)
K = tkernel.get_kernel_matrix()
kernel.append_kernel(CustomKernel(K))
subkfeats_train = RealFeatures(fm_train_real)
feats_train.append_feature_obj(subkfeats_train)
subkernel = PolyKernel(10,2)
kernel.append_kernel(subkernel)
kernel.init(feats_train, feats_train)
labels = Labels(fm_label_twoclass)
svm = LibSVM(1.0, kernel, labels)
svm.train()
kernel = CombinedKernel()
feats_pred = CombinedFeatures()
pfeats = RealFeatures(fm_test_real)
tkernel = PolyKernel(10,3)
tkernel.init(tfeats, pfeats)
K = tkernel.get_kernel_matrix()
kernel.append_kernel(CustomKernel(K))
subkfeats_test = RealFeatures(fm_test_real)
feats_pred.append_feature_obj(subkfeats_test)
subkernel = PolyKernel(10, 2)
kernel.append_kernel(subkernel)
kernel.init(feats_train, feats_pred)
svm.set_kernel(kernel)
svm.apply()
km_train=kernel.get_kernel_matrix()
return km_train,kernel
if __name__=='__main__':
kernel_combined_custom_poly_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()
traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,traindna,testdna],[traindat,testdat,traindna,testdna]]
def kernel_combined_modular(fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ):
from shogun.Kernel import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel
from shogun.Features import RealFeatures, StringCharFeatures, CombinedFeatures, DNA
kernel=CombinedKernel()
feats_train=CombinedFeatures()
feats_test=CombinedFeatures()
subkfeats_train=RealFeatures(fm_train_real)
subkfeats_test=RealFeatures(fm_test_real)
subkernel=GaussianKernel(10, 1.1)
feats_train.append_feature_obj(subkfeats_train)
feats_test.append_feature_obj(subkfeats_test)
kernel.append_kernel(subkernel)
subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
degree=3
subkernel=FixedDegreeStringKernel(10, degree)
feats_train.append_feature_obj(subkfeats_train)
feats_test.append_feature_obj(subkfeats_test)
kernel.append_kernel(subkernel)
subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
subkernel=LocalAlignmentStringKernel(10)
feats_train.append_feature_obj(subkfeats_train)
feats_test.append_feature_obj(subkfeats_test)
kernel.append_kernel(subkernel)
kernel.init(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Combined'
kernel_combined_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat =lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,3,0,False ],[traindat,testdat,4,0,False]]
def kernel_comm_ulong_string_modular (fm_train_dna=traindat,fm_test_dna=testdat, order=3, gap=0, reverse = False):
from shogun.Kernel import CommUlongStringKernel
from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA
from shogun.Preprocessor import SortUlongString
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_train_dna)
feats_train=StringUlongFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortUlongString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_test_dna)
feats_test=StringUlongFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
use_sign=False
kernel=CommUlongStringKernel(feats_train, feats_train, use_sign)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'CommUlongString'
kernel_comm_ulong_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,4,0,False, False],[traindat,testdat,4,0,False,False]]
def kernel_comm_word_string_modular (fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse = False, use_sign = False):
from shogun.Kernel import CommWordStringKernel
from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
from shogun.Preprocessor import SortWordString
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_train_dna)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortWordString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_test_dna)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
kernel=CommWordStringKernel(feats_train, feats_train, use_sign)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'CommWordString'
kernel_comm_word_string_modular(*parameter_list[0])
parameter_list =[[23],[24]] def kernel_const_modular (c=23): from shogun.Features import DummyFeatures from shogun.Kernel import ConstKernel feats_train=DummyFeatures(10) feats_test=DummyFeatures(17) kernel=ConstKernel(feats_train, feats_train, c) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Const' kernel_const_modular(*parameter_list[0])
from numpy.random import seed seed(42) parameter_list=[[7],[8]] def kernel_custom_modular (dim=7): from numpy.random import rand, seed from numpy import array, float32 from shogun.Features import RealFeatures from shogun.Kernel import CustomKernel seed(17) data=rand(dim, dim) feats=RealFeatures(data) symdata=data+data.T lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y<=x]) kernel=CustomKernel() # once with float64's kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(data) km_fullfull=kernel.get_kernel_matrix() # now once with float32's data=array(data,dtype=float32) kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(data) km_fullfull=kernel.get_kernel_matrix() return km_fullfull,kernel if __name__=='__main__': print 'Custom' kernel_custom_modular(*parameter_list[0])
parameter_list =[[23],[24]] def kernel_diag_modular (diag=23): from shogun.Features import DummyFeatures from shogun.Kernel import DiagKernel feats_train=DummyFeatures(10) feats_test=DummyFeatures(17) kernel=DiagKernel(feats_train, feats_train, diag) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Diag' kernel_diag_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()
traindat = double(lm.load_numbers('../data/fm_test_real.dat'))
testdat = double(lm.load_numbers('../data/fm_train_real.dat'))
parameter_list=[[traindat,testdat,1.7],[traindat,testdat,1.8]]
def kernel_distance_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.7):
from shogun.Kernel import DistanceKernel
from shogun.Features import RealFeatures
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance()
kernel=DistanceKernel(feats_train, feats_test, width, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Distance'
kernel_distance_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,5,5],[traindat,testdat,6,6]]
def kernel_distantsegments_modular (fm_train_dna=traindat,fm_test_dna=testdat,delta=5, theta=5):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import DistantSegmentsKernel
feats_train=StringCharFeatures(fm_train_dna, DNA)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=DistantSegmentsKernel(feats_train, feats_train, 10, delta, theta)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train, km_test, kernel
if __name__=='__main__':
print 'DistantSegments'
kernel_distantsegments_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
def kernel_exponential_modular (fm_train_real=traindat,fm_test_real=testdat, tau_coef=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import ExponentialKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance = EuclidianDistance(feats_train, feats_train)
kernel=ExponentialKernel(feats_train, feats_train, tau_coef, distance, 10)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Exponential'
kernel_exponential_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')
parameter_list = [[traindat,testdat,label_traindat,1,4,1e-1,1,0,False,[1,False,True]],[traindat,testdat,label_traindat,3,4,1e-1,1,0,False,[1,False,True]]]
fm_hmm_pos=[ traindat[i] for i in where([label_traindat==1])[1] ]
fm_hmm_neg=[ traindat[i] for i in where([label_traindat==-1])[1] ]
def kernel_fisher_modular(fm_train_dna=traindat, fm_test_dna=testdat,
label_train_dna=label_traindat,
N=1,M=4,pseudo=1e-1,order=1,gap=0,reverse=False,
kargs=[1,False,True]):
from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA
from shogun.Kernel import PolyKernel
from shogun.Distribution import HMM, BW_NORMAL#, MSG_DEBUG
# train HMM for positive class
charfeat=StringCharFeatures(fm_hmm_pos, DNA)
#charfeat.io.set_loglevel(MSG_DEBUG)
hmm_pos_train=StringWordFeatures(charfeat.get_alphabet())
hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
pos=HMM(hmm_pos_train, N, M, pseudo)
pos.baum_welch_viterbi_train(BW_NORMAL)
# train HMM for negative class
charfeat=StringCharFeatures(fm_hmm_neg, DNA)
hmm_neg_train=StringWordFeatures(charfeat.get_alphabet())
hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
neg=HMM(hmm_neg_train, N, M, pseudo)
neg.baum_welch_viterbi_train(BW_NORMAL)
# Kernel training data
charfeat=StringCharFeatures(fm_train_dna, DNA)
wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
# Kernel testing data
charfeat=StringCharFeatures(fm_test_dna, DNA)
wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
# get kernel on training data
pos.set_observations(wordfeats_train)
neg.set_observations(wordfeats_train)
feats_train=FKFeatures(10, pos, neg)
feats_train.set_opt_a(-1) #estimate prior
kernel=PolyKernel(feats_train, feats_train, *kargs)
km_train=kernel.get_kernel_matrix()
# get kernel on testing data
pos_clone=HMM(pos)
neg_clone=HMM(neg)
pos_clone.set_observations(wordfeats_test)
neg_clone.set_observations(wordfeats_test)
feats_test=FKFeatures(10, pos_clone, neg_clone)
feats_test.set_a(feats_train.get_a()) #use prior from training data
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print "Fisher Kernel"
kernel_fisher_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindat, testdat,3],[traindat,testdat,4]]
def kernel_fixed_degree_string_modular (fm_train_dna=traindat, fm_test_dna=testdat,degree=3):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import FixedDegreeStringKernel
feats_train=StringCharFeatures(fm_train_dna, DNA)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=FixedDegreeStringKernel(feats_train, feats_train, degree)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'FixedDegreeString'
kernel_fixed_degree_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.3],[traindat,testdat, 1.4]]
def kernel_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat, width=1.3):
from shogun.Features import RealFeatures
from shogun.Kernel import GaussianKernel
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Gaussian'
kernel_gaussian_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.8,2,1],[traindat,testdat,1.9,2,1]]
def kernel_gaussian_shift_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.8,max_shift=2,shift_step=1):
from shogun.Features import RealFeatures
from shogun.Kernel import GaussianShiftKernel
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianShiftKernel(feats_train, feats_train, width, max_shift, shift_step)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'GaussianShift'
kernel_gaussian_shift_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindat,testdat,label_traindat,3,0,False],[traindat,testdat,label_traindat,3,0,False]]
def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,gap=0,reverse=False):
from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels
from shogun.Kernel import HistogramWordStringKernel
from shogun.Classifier import PluginEstimate#, MSG_DEBUG
reverse = reverse
charfeat=StringCharFeatures(DNA)
#charfeat.io.set_loglevel(MSG_DEBUG)
charfeat.set_features(fm_train_dna)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_test_dna)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
pie=PluginEstimate()
labels=Labels(label_train_dna)
pie.set_labels(labels)
pie.set_features(feats_train)
pie.train()
kernel=HistogramWordStringKernel(feats_train, feats_train, pie)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
pie.set_features(feats_test)
pie.apply().get_labels()
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'PluginEstimate w/ HistogramWord'
kernel_histogram_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
def kernel_inversemultiquadric_modular (fm_train_real=traindat,fm_test_real=testdat, shift_coef=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import InverseMultiQuadricKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=InverseMultiQuadricKernel(feats_train, feats_train, shift_coef, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'InverseMultiquadric'
kernel_inversemultiquadric_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.9],[traindat,testdat,1.7]]
def kernel_io_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.9):
from shogun.Features import RealFeatures
from shogun.Kernel import GaussianKernel
from shogun.IO import AsciiFile, BinaryFile
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
km_train=kernel.get_kernel_matrix()
f=AsciiFile("gaussian_train.ascii","w")
kernel.save(f)
del f
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
f=AsciiFile("gaussian_test.ascii","w")
kernel.save(f)
del f
#clean up
import os
os.unlink("gaussian_test.ascii")
os.unlink("gaussian_train.ascii")
return km_train, km_test, kernel
if __name__=='__main__':
print 'Gaussian'
kernel_io_modular(*parameter_list[0])
###########################################################################
# linear kernel on byte features
###########################################################################
from tools.load import LoadMatrix
from numpy import ubyte
lm=LoadMatrix()
traindat = ubyte(lm.load_numbers('../data/fm_train_byte.dat'))
testdat = ubyte(lm.load_numbers('../data/fm_test_byte.dat'))
parameter_list=[[traindat,testdat],[traindat,testdat]]
def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat):
from shogun.Kernel import LinearKernel
from shogun.Features import ByteFeatures
feats_train=ByteFeatures(fm_train_byte)
feats_test=ByteFeatures(fm_test_byte)
kernel=LinearKernel(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return kernel
if __name__=='__main__':
print 'LinearByte'
kernel_linear_byte_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]]
def kernel_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):
from shogun.Features import RealFeatures
from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=LinearKernel()
kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
kernel.init(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Linear'
kernel_linear_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def kernel_linear_string_modular (fm_train_dna=traindat,fm_test_dna=testdat):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import LinearStringKernel
feats_train=StringCharFeatures(fm_train_dna, DNA)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=LinearStringKernel(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
from tools.load import LoadMatrix
print 'LinearString'
kernel_linear_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import ushort
lm=LoadMatrix()
traindat = ushort(lm.load_numbers('../data/fm_train_word.dat'))
testdat = ushort(lm.load_numbers('../data/fm_test_word.dat'))
parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.2]]
def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2):
from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
from shogun.Features import WordFeatures
feats_train=WordFeatures(fm_train_word)
feats_test=WordFeatures(fm_test_word)
kernel=LinearKernel(feats_train, feats_train)
kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
kernel.init(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return kernel
if __name__=='__main__':
print 'LinearWord'
kernel_linear_word_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def kernel_local_alignment_string_modular(fm_train_dna=traindat,fm_test_dna=testdat):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import LocalAlignmentStringKernel
feats_train=StringCharFeatures(fm_train_dna, DNA)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=LocalAlignmentStringKernel(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'LocalAlignmentString'
kernel_local_alignment_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindat,testdat,5,5,7],[traindat,testdat,5,5,7]]
def kernel_locality_improved_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,length=5,inner_degree=5,outer_degree=7):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import LocalityImprovedStringKernel
feats_train=StringCharFeatures(fm_train_dna, DNA)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=LocalityImprovedStringKernel(
feats_train, feats_train, length, inner_degree, outer_degree)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'LocalityImprovedString'
kernel_locality_improved_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]
def kernel_log_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
from shogun.Features import RealFeatures
from shogun.Kernel import LogKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=LogKernel(feats_train, feats_train, degree, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Log'
kernel_log_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat, 3,1.4,10,3,0,False],[
traindat,testdat, 3,1.4,10,3,0,False]]
def kernel_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
degree=3,scale=1.4,size_cache=10,order=3,gap=0,reverse=False):
from shogun.Kernel import MatchWordStringKernel, AvgDiagKernelNormalizer
from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
charfeat=StringCharFeatures(fm_train_dna, DNA)
feats_train=StringWordFeatures(DNA)
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
charfeat=StringCharFeatures(fm_test_dna, DNA)
feats_test=StringWordFeatures(DNA)
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
kernel=MatchWordStringKernel(size_cache, degree)
kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
kernel.init(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'MatchWordString'
kernel_match_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
def kernel_multiquadric_modular (fm_train_real=traindat,fm_test_real=testdat, shift_coef=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import MultiquadricKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=MultiquadricKernel(feats_train, feats_train, shift_coef, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Multiquadric'
kernel_multiquadric_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,3,1.2,10],[traindat,testdat,4,1.3,10]]
def kernel_oligo_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,k=3,width=1.2,size_cache=10):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import OligoStringKernel
feats_train=StringCharFeatures(fm_train_dna, DNA)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=OligoStringKernel(size_cache, k, width)
kernel.init(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'OligoString'
kernel_oligo_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,3,False],[traindat,testdat,4,False]]
def kernel_poly_match_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=3,inhomogene=False):
from shogun.Kernel import PolyMatchStringKernel
from shogun.Features import StringCharFeatures, DNA
feats_train=StringCharFeatures(fm_train_dna, DNA)
feats_test=StringCharFeatures(fm_train_dna, DNA)
kernel=PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'PolyMatchString'
kernel_poly_match_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,2,True,3,0,False],[traindat,testdat,2,True,3,0,False]]
def kernel_poly_match_word_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,
degree=2,inhomogene=True,order=3,gap=0,reverse=False):
from shogun.Kernel import PolyMatchWordStringKernel
from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
charfeat=StringCharFeatures(fm_train_dna, DNA)
feats_train=StringWordFeatures(DNA)
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
charfeat=StringCharFeatures(fm_test_dna, DNA)
feats_test=StringWordFeatures(DNA)
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
kernel=PolyMatchWordStringKernel(feats_train, feats_train, degree, inhomogene)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'PolyMatchWordString'
kernel_poly_match_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,4,False,True],[traindat,testdat,5,False,True]]
def kernel_poly_modular (fm_train_real=traindat,fm_test_real=testdat,degree=4,inhomogene=False,
use_normalization=True):
from shogun.Features import RealFeatures
from shogun.Kernel import PolyKernel
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=PolyKernel(
feats_train, feats_train, degree, inhomogene, use_normalization)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Poly'
kernel_poly_modular (*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]
def kernel_power_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
from shogun.Features import RealFeatures
from shogun.Kernel import PowerKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=PowerKernel(feats_train, feats_train, degree, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Power'
kernel_power_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
def kernel_rationalquadratic_modular (fm_train_real=traindat,fm_test_real=testdat, shift_coef=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import RationalQuadraticKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=RationalQuadraticKernel(feats_train, feats_train, shift_coef, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'RationalQuadratic'
kernel_rationalquadratic_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')
parameter_list = [[traindat,testdat,label_traindat,3,0,False],[traindat,testdat,label_traindat,3,0,False]]
def kernel_salzberg_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,
order=3,gap=0,reverse=False):
from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels
from shogun.Kernel import SalzbergWordStringKernel
from shogun.Classifier import PluginEstimate
charfeat=StringCharFeatures(fm_train_dna, DNA)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
charfeat=StringCharFeatures(fm_test_dna, DNA)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
pie=PluginEstimate()
labels=Labels(label_train_dna)
pie.set_labels(labels)
pie.set_features(feats_train)
pie.train()
kernel=SalzbergWordStringKernel(feats_train, feats_train, pie, labels)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
pie.set_features(feats_test)
pie.apply().get_labels()
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'PluginEstimate w/ SalzbergWord'
kernel_salzberg_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,10,1.2,1.3],[traindat,testdat,10,1.2,1.3]]
def kernel_sigmoid_modular(fm_train_real=traindat,fm_test_real=testdat,size_cache=10,gamma=1.2,coef0=1.3):
from shogun.Features import RealFeatures
from shogun.Kernel import SigmoidKernel
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Sigmoid'
kernel_sigmoid_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,5,5,1],[traindat,testdat,5,3,2]]
def kernel_simple_locality_improved_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,
length=5,inner_degree=5,outer_degree=1 ):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import SimpleLocalityImprovedStringKernel, MSG_DEBUG
feats_train=StringCharFeatures(fm_train_dna, DNA)
#feats_train.io.set_loglevel(MSG_DEBUG)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=SimpleLocalityImprovedStringKernel(
feats_train, feats_train, length, inner_degree, outer_degree)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'SimpleLocalityImprovedString'
kernel_simple_locality_improved_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]]
def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ):
from shogun.Features import SparseRealFeatures
from shogun.Kernel import GaussianKernel
feats_train=SparseRealFeatures(fm_train_real)
feats_test=SparseRealFeatures(fm_test_real)
kernel=GaussianKernel(feats_train, feats_train, width)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'SparseGaussian'
kernel_sparse_gaussian_modular (*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]]
def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1):
from shogun.Features import SparseRealFeatures
from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
feats_train=SparseRealFeatures(fm_train_real)
feats_test=SparseRealFeatures(fm_test_real)
kernel=LinearKernel()
kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
kernel.init(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'SparseLinear'
kernel_sparse_linear_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,10,3,True],[traindat,testdat,10,4,True]]
def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat,
size_cache=10,degree=3,inhomogene=True ):
from shogun.Features import SparseRealFeatures
from shogun.Kernel import PolyKernel
feats_train=SparseRealFeatures(fm_train_real)
feats_test=SparseRealFeatures(fm_test_real)
kernel=PolyKernel(feats_train, feats_train, size_cache, degree,
inhomogene)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'SparsePoly'
kernel_sparse_poly_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
def kernel_spherical_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import MultiquadricKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=MultiquadricKernel(feats_train, feats_train, sigma, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Spherical'
kernel_spherical_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def kernel_spline_modular (fm_train_real=traindat,fm_test_real=testdat):
from shogun.Features import RealFeatures
from shogun.Kernel import SplineKernel
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=SplineKernel(feats_train, feats_train)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Spline'
kernel_spline_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')
fm_hmm_pos=[traindat[i] for i in where([label_traindat==1])[1] ]
fm_hmm_neg=[traindat[i] for i in where([label_traindat==-1])[1] ]
parameter_list = [[traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True]], \
[traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True] ]]
def kernel_top_modular(fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1,
order=1,gap=0,reverse=False,kargs=[1, False, True]):
from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
from shogun.Kernel import PolyKernel
from shogun.Distribution import HMM, BW_NORMAL
N=1 # toy HMM with 1 state
M=4 # 4 observations -> DNA
# train HMM for positive class
charfeat=StringCharFeatures(fm_hmm_pos, DNA)
hmm_pos_train=StringWordFeatures(charfeat.get_alphabet())
hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
pos=HMM(hmm_pos_train, N, M, pseudo)
pos.baum_welch_viterbi_train(BW_NORMAL)
# train HMM for negative class
charfeat=StringCharFeatures(fm_hmm_neg, DNA)
hmm_neg_train=StringWordFeatures(charfeat.get_alphabet())
hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
neg=HMM(hmm_neg_train, N, M, pseudo)
neg.baum_welch_viterbi_train(BW_NORMAL)
# Kernel training data
charfeat=StringCharFeatures(fm_train_dna, DNA)
wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
# Kernel testing data
charfeat=StringCharFeatures(fm_test_dna, DNA)
wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
# get kernel on training data
pos.set_observations(wordfeats_train)
neg.set_observations(wordfeats_train)
feats_train=TOPFeatures(10, pos, neg, False, False)
kernel=PolyKernel(feats_train, feats_train, *kargs)
km_train=kernel.get_kernel_matrix()
# get kernel on testing data
pos_clone=HMM(pos)
neg_clone=HMM(neg)
pos_clone.set_observations(wordfeats_test)
neg_clone.set_observations(wordfeats_test)
feats_test=TOPFeatures(10, pos_clone, neg_clone, False, False)
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print "TOP Kernel"
kernel_top_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]
def kernel_tstudent_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
from shogun.Features import RealFeatures
from shogun.Kernel import TStudentKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=TStudentKernel(feats_train, feats_train, degree, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'TStudent'
kernel_tstudent_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 10.0]]
def kernel_wave_modular (fm_train_real=traindat,fm_test_real=testdat, theta=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import WaveKernel
from shogun.Distance import EuclidianDistance
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
distance=EuclidianDistance(feats_train, feats_train)
kernel=WaveKernel(feats_train, feats_train, theta, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Wave'
kernel_wave_modular(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.5, 1.0],[traindat,testdat, 1.0, 1.5]]
def kernel_wavelet_modular (fm_train_real=traindat,fm_test_real=testdat, dilation=1.5, translation=1.0):
from shogun.Features import RealFeatures
from shogun.Kernel import WaveletKernel
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
kernel=WaveletKernel(feats_train, feats_train, 10, dilation, translation)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'Wavelet'
kernel_wavelet_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def kernel_weighted_comm_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,order=3,gap=0,reverse=True ):
from shogun.Kernel import WeightedCommWordStringKernel
from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
from shogun.Preprocessor import SortWordString
charfeat=StringCharFeatures(fm_train_dna, DNA)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortWordString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
charfeat=StringCharFeatures(fm_test_dna, DNA)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
use_sign=False
kernel=WeightedCommWordStringKernel(feats_train, feats_train, use_sign)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'WeightedCommWordString'
kernel_weighted_comm_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,20],[traindat,testdat,22]]
def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import WeightedDegreePositionStringKernel, MSG_DEBUG
feats_train=StringCharFeatures(fm_train_dna, DNA)
#feats_train.io.set_loglevel(MSG_DEBUG)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree)
from numpy import zeros,ones,float64,int32
kernel.set_shifts(10*ones(len(fm_train_dna[0]), dtype=int32))
kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'WeightedDegreePositionString'
kernel_weighted_degree_position_string_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,3],[traindat,testdat,20]]
def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
from shogun.Features import StringCharFeatures, DNA
from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG
feats_train=StringCharFeatures(fm_train_dna, DNA)
#feats_train.io.set_loglevel(MSG_DEBUG)
feats_test=StringCharFeatures(fm_test_dna, DNA)
kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
from numpy import arange,double
weights=arange(1,degree+1,dtype=double)[::-1]/ \
sum(arange(1,degree+1,dtype=double))
kernel.set_wd_weights(weights)
#from numpy import ones,float64,int32
#kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
#this is how to serializate the kernel
#import pickle
#pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2)
#k=pickle.load(file('kernel_obj.dump','r'))
return km_train, km_test, kernel
if __name__=='__main__':
print 'WeightedDegreeString'
kernel_weighted_degree_string_modular(*parameter_list[0])
from numpy import * from shogun.Mathematics import * x=array([[20.0,15,15],[10,20,20]]) y=array([[21.0,21,18],[19,19,22]]) z=array([[15.0,27,18],[32,5,23]]) parameter_list = [[x,concatenate((x,y,z),1)]] def library_fisher2x3_modular(table, tables): pval=Math_fishers_exact_test_for_2x3_table(table) pvals=Math_fishers_exact_test_for_multiple_2x3_tables(tables) return (pval,pvals) if __name__=='__main__': print 'Fisher 2x3' library_fisher2x3_modular(*parameter_list[0])
import time from modshogun import Time parameter_list = [[1]] def library_time(sleep_secs): # measure wall clock time difference t=Time() time.sleep(1) diff=t.cur_time_diff() # measure CPU time required cpu_diff=t.cur_runtime_diff_sec() # return results as integers to enable testing return round(diff),round(cpu_diff) if __name__=='__main__': print 'Time' library_time(*parameter_list[0])
from shogun.Features import CombinedFeatures, RealFeatures, Labels
from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
from shogun.Classifier import MKLClassification
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]
# fm_train_real.shape
# fm_test_real.shape
# combined_custom()
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):
##################################
# set up and train
# create some poly train/test matrix
tfeats = RealFeatures(fm_train_real)
tkernel = PolyKernel(10,3)
tkernel.init(tfeats, tfeats)
K_train = tkernel.get_kernel_matrix()
pfeats = RealFeatures(fm_test_real)
tkernel.init(tfeats, pfeats)
K_test = tkernel.get_kernel_matrix()
# create combined train features
feats_train = CombinedFeatures()
feats_train.append_feature_obj(RealFeatures(fm_train_real))
# and corresponding combined kernel
kernel = CombinedKernel()
kernel.append_kernel(CustomKernel(K_train))
kernel.append_kernel(PolyKernel(10,2))
kernel.init(feats_train, feats_train)
# train mkl
labels = Labels(fm_label_twoclass)
mkl = MKLClassification()
# which norm to use for MKL
mkl.set_mkl_norm(1) #2,3
# set cost (neg, pos)
mkl.set_C(1, 1)
# set kernel and labels
mkl.set_kernel(kernel)
mkl.set_labels(labels)
# train
mkl.train()
#w=kernel.get_subkernel_weights()
#kernel.set_subkernel_weights(w)
##################################
# test
# create combined test features
feats_pred = CombinedFeatures()
feats_pred.append_feature_obj(RealFeatures(fm_test_real))
# and corresponding combined kernel
kernel = CombinedKernel()
kernel.append_kernel(CustomKernel(K_test))
kernel.append_kernel(PolyKernel(10, 2))
kernel.init(feats_train, feats_pred)
# and classify
mkl.set_kernel(kernel)
mkl.apply()
return mkl.apply(),kernel
if __name__=='__main__':
mkl_binclass_modular (*parameter_list[0])
from tools.load import LoadMatrix
lm = LoadMatrix()
fm_train_real = lm.load_numbers('../data/fm_train_real.dat')
fm_test_real = lm.load_numbers('../data/fm_test_real.dat')
label_train_multiclass = lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[
[ fm_train_real, fm_test_real, label_train_multiclass, 1.2, 1.2, 1e-5, 1, 0.001, 1.5],
[ fm_train_real, fm_test_real, label_train_multiclass, 5, 1.2, 1e-2, 1, 0.001, 2]]
def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass,
width, C, epsilon, num_threads, mkl_epsilon, mkl_norm):
from shogun.Features import CombinedFeatures, RealFeatures, Labels
from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel
from shogun.Classifier import MKLMultiClass
kernel = CombinedKernel()
feats_train = CombinedFeatures()
feats_test = CombinedFeatures()
subkfeats_train = RealFeatures(fm_train_real)
subkfeats_test = RealFeatures(fm_test_real)
subkernel = GaussianKernel(10, width)
feats_train.append_feature_obj(subkfeats_train)
feats_test.append_feature_obj(subkfeats_test)
kernel.append_kernel(subkernel)
subkfeats_train = RealFeatures(fm_train_real)
subkfeats_test = RealFeatures(fm_test_real)
subkernel = LinearKernel()
feats_train.append_feature_obj(subkfeats_train)
feats_test.append_feature_obj(subkfeats_test)
kernel.append_kernel(subkernel)
subkfeats_train = RealFeatures(fm_train_real)
subkfeats_test = RealFeatures(fm_test_real)
subkernel = PolyKernel(10,2)
feats_train.append_feature_obj(subkfeats_train)
feats_test.append_feature_obj(subkfeats_test)
kernel.append_kernel(subkernel)
kernel.init(feats_train, feats_train)
labels = Labels(label_train_multiclass)
mkl = MKLMultiClass(C, kernel, labels)
mkl.set_epsilon(epsilon);
mkl.parallel.set_num_threads(num_threads)
mkl.set_mkl_epsilon(mkl_epsilon)
mkl.set_mkl_norm(mkl_norm)
mkl.train()
kernel.init(feats_train, feats_test)
out = mkl.apply().get_labels()
return out
if __name__ == '__main__':
print 'mkl_multiclass'
mkl_multiclass_modular(*parameter_list[0])
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2011 Heiko Strathmann
# Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
#
from numpy.random import randn
from numpy import *
# generate some overlapping training vectors
num_vectors=100
vec_distance=1
traindat=concatenate((randn(2,num_vectors)-vec_distance,
randn(2,num_vectors)+vec_distance), axis=1)
label_traindat=concatenate((-ones(num_vectors), ones(num_vectors)));
parameter_list = [[traindat,label_traindat]]
def modelselection_grid_search_linear_modular(traindat=traindat, label_traindat=label_traindat):
from shogun.Evaluation import CrossValidation, CrossValidationResult
from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
from shogun.Evaluation import StratifiedCrossValidationSplitting
from shogun.ModelSelection import GridSearchModelSelection
from shogun.ModelSelection import ModelSelectionParameters, R_EXP
from shogun.ModelSelection import ParameterCombination
from shogun.Features import Labels
from shogun.Features import RealFeatures
from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC
# build parameter tree to select C1 and C2
param_tree_root=ModelSelectionParameters()
c1=ModelSelectionParameters("C1");
param_tree_root.append_child(c1)
c1.build_values(-2.0, 2.0, R_EXP);
c2=ModelSelectionParameters("C2");
param_tree_root.append_child(c2);
c2.build_values(-2.0, 2.0, R_EXP);
# training data
features=RealFeatures(traindat)
labels=Labels(label_traindat)
# classifier
classifier=LibLinear(L2R_L2LOSS_SVC)
# splitting strategy for cross-validation
splitting_strategy=StratifiedCrossValidationSplitting(labels, 10)
# evaluation method
evaluation_criterium=ContingencyTableEvaluation(ACCURACY)
# cross-validation instance
cross_validation=CrossValidation(classifier, features, labels,
splitting_strategy, evaluation_criterium)
# model selection instance
model_selection=GridSearchModelSelection(param_tree_root,
cross_validation)
# perform model selection with selected methods
#print "performing model selection of"
#param_tree_root.print_tree()
best_parameters=model_selection.select_model()
# print best parameters
#print "best parameters:"
#best_parameters.print_tree()
# apply them and print result
best_parameters.apply_to_machine(classifier)
result=cross_validation.evaluate()
#result.print_result()
if __name__=='__main__':
print 'ModelSelection GridSearchLinear'
modelselection_grid_search_linear_modular(*parameter_list[0])
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2011 Heiko Strathmann
# Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
#
parameter_list=[[None]]
def modelselection_parameter_tree_modular(dummy):
from shogun.ModelSelection import ParameterCombination
from shogun.ModelSelection import ModelSelectionParameters, R_EXP, R_LINEAR
from shogun.ModelSelection import DynamicParameterCombinationArray
from shogun.Kernel import PowerKernel
from shogun.Kernel import GaussianKernel
from shogun.Kernel import DistantSegmentsKernel
from shogun.Distance import MinkowskiMetric
root=ModelSelectionParameters()
combinations=root.get_combinations()
combinations.get_num_elements()
c=ModelSelectionParameters('C');
root.append_child(c)
c.build_values(1, 11, R_EXP)
power_kernel=PowerKernel()
param_power_kernel=ModelSelectionParameters('kernel', power_kernel)
root.append_child(param_power_kernel)
param_power_kernel_degree=ModelSelectionParameters('degree')
param_power_kernel_degree.build_values(1, 1, R_EXP)
param_power_kernel.append_child(param_power_kernel_degree)
metric1=MinkowskiMetric(10)
param_power_kernel_metric1=ModelSelectionParameters('distance', metric1)
param_power_kernel.append_child(param_power_kernel_metric1)
param_power_kernel_metric1_k=ModelSelectionParameters('k')
param_power_kernel_metric1_k.build_values(1, 12, R_LINEAR)
param_power_kernel_metric1.append_child(param_power_kernel_metric1_k)
gaussian_kernel=GaussianKernel()
param_gaussian_kernel=ModelSelectionParameters('kernel', gaussian_kernel)
root.append_child(param_gaussian_kernel)
param_gaussian_kernel_width=ModelSelectionParameters('width')
param_gaussian_kernel_width.build_values(1, 2, R_EXP)
param_gaussian_kernel.append_child(param_gaussian_kernel_width)
ds_kernel=DistantSegmentsKernel()
param_ds_kernel=ModelSelectionParameters('kernel', ds_kernel)
root.append_child(param_ds_kernel)
param_ds_kernel_delta=ModelSelectionParameters('delta')
param_ds_kernel_delta.build_values(1, 2, R_EXP)
param_ds_kernel.append_child(param_ds_kernel_delta)
param_ds_kernel_theta=ModelSelectionParameters('theta')
param_ds_kernel_theta.build_values(1, 2, R_EXP)
param_ds_kernel.append_child(param_ds_kernel_theta)
# root.print_tree()
combinations=root.get_combinations()
# for i in range(combinations.get_num_elements()):
# combinations.get_element(i).print_tree()
return
if __name__=='__main__':
print 'ModelSelection ParameterTree'
modelselection_parameter_tree_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data, 3], [data, 10]]
def preprocessor_dimensionreductionpreprocessor_modular(data, k):
from shogun.Features import RealFeatures
from shogun.Preprocessor import DimensionReductionPreprocessor
from shogun.Converter import LocallyLinearEmbedding
features = RealFeatures(data)
converter = LocallyLinearEmbedding()
converter.set_k(k)
preprocessor = DimensionReductionPreprocessor(converter)
preprocessor.init(features)
preprocessor.apply_to_feature_matrix(features)
return features
if __name__=='__main__':
print 'DimensionReductionPreprocessor'
preprocessor_dimensionreductionpreprocessor_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data, 0.01, 1.0], [data, 0.05, 2.0]]
def preprocessor_kernelpca_modular(data, threshold, width):
from shogun.Features import RealFeatures
from shogun.Preprocessor import KernelPCA
from shogun.Kernel import GaussianKernel
features = RealFeatures(data)
kernel = GaussianKernel(features,features,width)
preprocessor = KernelPCA(kernel)
preprocessor.init(features)
preprocessor.apply_to_feature_matrix(features)
return features
if __name__=='__main__':
print 'KernelPCA'
preprocessor_kernelpca_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat+10,testdat+10,1.4,10],[traindat+10,testdat+10,1.5,10]]
def preprocessor_logplusone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
from shogun.Kernel import Chi2Kernel
from shogun.Features import RealFeatures
from shogun.Preprocessor import LogPlusOne
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
preproc=LogPlusOne()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'LogPlusOne'
preprocessor_logplusone_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,1.4,10],[traindat,testdat,1.5,10]]
def preprocessor_normone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
from shogun.Kernel import Chi2Kernel
from shogun.Features import RealFeatures
from shogun.Preprocessor import NormOne
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
preprocessor=NormOne()
preprocessor.init(feats_train)
feats_train.add_preprocessor(preprocessor)
feats_train.apply_preprocessor()
feats_test.add_preprocessor(preprocessor)
feats_test.apply_preprocessor()
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'NormOne'
preprocessor_normone_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data]]
def preprocessor_pca_modular(data):
from shogun.Features import RealFeatures
from shogun.Preprocessor import PCA
features = RealFeatures(data)
preprocessor = PCA()
preprocessor.init(features)
preprocessor.apply_to_feature_matrix(features)
return features
if __name__=='__main__':
print 'PCA'
preprocessor_pca_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,1.5,10],[traindat,testdat,1.5,10]]
def preprocessor_prunevarsubmean_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
from shogun.Kernel import Chi2Kernel
from shogun.Features import RealFeatures
from shogun.Preprocessor import PruneVarSubMean
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
preproc=PruneVarSubMean()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'PruneVarSubMean'
preprocessor_prunevarsubmean_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,1.5,10],[traindat,testdat,1.5,10]]
def preprocessor_randomfouriergausspreproc_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
from shogun.Kernel import Chi2Kernel
from shogun.Features import RealFeatures
from shogun.Preprocessor import RandomFourierGaussPreproc
feats_train=RealFeatures(fm_train_real)
feats_test=RealFeatures(fm_test_real)
preproc=RandomFourierGaussPreproc()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'RandomFourierGaussPreproc'
preprocessor_randomfouriergausspreproc_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindna,testdna,4,0,False,False],[traindna,testdna,3,0,False,False]]
def preprocessor_sortulongstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):
from shogun.Kernel import CommUlongStringKernel
from shogun.Features import StringCharFeatures, StringUlongFeatures, DNA
from shogun.Preprocessor import SortUlongString
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_train_dna)
feats_train=StringUlongFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
charfeat=StringCharFeatures(DNA)
charfeat.set_features(fm_test_dna)
feats_test=StringUlongFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortUlongString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
kernel=CommUlongStringKernel(feats_train, feats_train, use_sign)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'CommUlongString'
preprocessor_sortulongstring_modular(*parameter_list[0])
from tools.load import LoadMatrix
lm=LoadMatrix()
traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindna,testdna,3,0,False,False],[traindna,testdna,3,0,False,False]]
def preprocessor_sortwordstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):
from shogun.Kernel import CommWordStringKernel
from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
from shogun.Preprocessor import SortWordString
charfeat=StringCharFeatures(fm_train_dna, DNA)
feats_train=StringWordFeatures(charfeat.get_alphabet())
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=SortWordString()
preproc.init(feats_train)
feats_train.add_preprocessor(preproc)
feats_train.apply_preprocessor()
charfeat=StringCharFeatures(fm_test_dna, DNA)
feats_test=StringWordFeatures(charfeat.get_alphabet())
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test.add_preprocessor(preproc)
feats_test.apply_preprocessor()
kernel=CommWordStringKernel(feats_train, feats_train, use_sign)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
if __name__=='__main__':
print 'CommWordString'
preprocessor_sortwordstring_modular(*parameter_list[0])
###########################################################################
# kernel ridge regression
###########################################################################
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,0.8,1e-6],[traindat,testdat,label_traindat,0.9,1e-7]]
def regression_krr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,width=0.8,tau=1e-6):
from shogun.Features import Labels, RealFeatures
from shogun.Kernel import GaussianKernel
from shogun.Regression import KRR
feats_train=RealFeatures(fm_train)
feats_test=RealFeatures(fm_test)
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train)
krr=KRR(tau, kernel, labels)
krr.train(feats_train)
kernel.init(feats_train, feats_test)
out = krr.apply().get_labels()
return out,kernel,krr
# equivialent shorter version
def krr_short ():
print 'KRR_short'
from shogun.Features import Labels, RealFeatures
from shogun.Kernel import GaussianKernel
from shogun.Regression import KRR
width=0.8; tau=1e-6
krr=KRR(tau, GaussianKernel(0, width), Labels(label_train))
krr.train(RealFeatures(fm_train))
out = krr.apply(RealFeatures(fm_test)).get_labels()
return krr,out
if __name__=='__main__':
print 'KRR'
regression_krr_modular(*parameter_list[0])
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2], \
[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2]]
def regression_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
from shogun.Features import Labels, RealFeatures
from shogun.Kernel import GaussianKernel
from shogun.Regression import LibSVR
feats_train=RealFeatures(fm_train)
feats_test=RealFeatures(fm_test)
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train)
svr=LibSVR(C, tube_epsilon, kernel, labels)
svr.set_epsilon(epsilon)
svr.train()
kernel.init(feats_train, feats_test)
out1=svr.apply().get_labels()
out2=svr.apply(feats_test).get_labels()
return out1,out2,kernel
if __name__=='__main__':
print 'LibSVR'
regression_libsvr_modular(*parameter_list[0])
###########################################################################
# svm light based support vector regression
###########################################################################
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,label_traindat,1.2,1,1e-5,1e-2,1],[traindat,testdat,label_traindat,2.3,0.5,1e-5,1e-6,1]]
def regression_svrlight_modular(fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):
from shogun.Features import Labels, RealFeatures
from shogun.Kernel import GaussianKernel
try:
from shogun.Regression import SVRLight
except ImportError:
print 'No support for SVRLight available.'
return
feats_train=RealFeatures(fm_train)
feats_test=RealFeatures(fm_test)
kernel=GaussianKernel(feats_train, feats_train, width)
labels=Labels(label_train)
svr=SVRLight(C, epsilon, kernel, labels)
svr.set_tube_epsilon(tube_epsilon)
svr.parallel.set_num_threads(num_threads)
svr.train()
kernel.init(feats_train, feats_test)
out = svr.apply().get_labels()
return out, kernel
if __name__=='__main__':
print 'SVRLight'
regression_svrlight_modular(*parameter_list[0])
parameter_list=[[5,1,10, 2.0, 10], [10,0.3,2, 1.0, 0.1]]
def check_status(status):
# silent...
assert(status)
#if status:
# print "OK reading/writing .h5\n"
#else:
# print "ERROR reading/writing .h5\n"
def serialization_complex_example(num=5, dist=1, dim=10, C=2.0, width=10):
import os
from numpy import concatenate, zeros, ones
from numpy.random import randn, seed
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import GMNPSVM
from shogun.Kernel import GaussianKernel
from shogun.IO import SerializableHdf5File,SerializableAsciiFile, \
SerializableJsonFile,SerializableXmlFile,MSG_DEBUG
from shogun.Preprocessor import NormOne, LogPlusOne
seed(17)
data=concatenate((randn(dim, num), randn(dim, num) + dist,
randn(dim, num) + 2*dist,
randn(dim, num) + 3*dist), axis=1)
lab=concatenate((zeros(num), ones(num), 2*ones(num), 3*ones(num)))
feats=RealFeatures(data)
#feats.io.set_loglevel(MSG_DEBUG)
kernel=GaussianKernel(feats, feats, width)
labels=Labels(lab)
svm = GMNPSVM(C, kernel, labels)
feats.add_preprocessor(NormOne())
feats.add_preprocessor(LogPlusOne())
feats.set_preprocessed(1)
svm.train(feats)
#svm.print_serializable()
fstream = SerializableHdf5File("blaah.h5", "w")
status = svm.save_serializable(fstream)
check_status(status)
fstream = SerializableAsciiFile("blaah.asc", "w")
status = svm.save_serializable(fstream)
check_status(status)
fstream = SerializableJsonFile("blaah.json", "w")
status = svm.save_serializable(fstream)
check_status(status)
fstream = SerializableXmlFile("blaah.xml", "w")
status = svm.save_serializable(fstream)
check_status(status)
fstream = SerializableHdf5File("blaah.h5", "r")
new_svm=GMNPSVM()
status = new_svm.load_serializable(fstream)
check_status(status)
new_svm.train()
fstream = SerializableAsciiFile("blaah.asc", "r")
new_svm=GMNPSVM()
status = new_svm.load_serializable(fstream)
check_status(status)
new_svm.train()
fstream = SerializableJsonFile("blaah.json", "r")
new_svm=GMNPSVM()
status = new_svm.load_serializable(fstream)
check_status(status)
new_svm.train()
fstream = SerializableXmlFile("blaah.xml", "r")
new_svm=GMNPSVM()
status = new_svm.load_serializable(fstream)
check_status(status)
new_svm.train()
os.unlink("blaah.h5")
os.unlink("blaah.asc")
os.unlink("blaah.json")
os.unlink("blaah.xml")
return svm,new_svm
if __name__=='__main__':
print 'Serialization SVMLight'
serialization_complex_example(*parameter_list[0])
from modshogun import *
from numpy import array
parameter_list=[[[[1.0,2,3],[4,5,6]]]]
def serialization_matrix_modular(m):
feats=RealFeatures(array(m))
#feats.io.set_loglevel(0)
fstream = SerializableAsciiFile("foo.asc", "w")
feats.save_serializable(fstream)
l=Labels(array([1.0,2,3]))
fstream = SerializableAsciiFile("foo2.asc", "w")
l.save_serializable(fstream)
os.unlink("foo.asc")
os.unlink("foo2.asc")
if __name__=='__main__':
print 'Serialization Matrix Modular'
serialization_matrix_modular(*parameter_list[0])
parameter_list=[[10, 1, 2.1, 2.0]]
def serialization_svmlight_modular(num, dist, width, C):
from shogun.IO import MSG_DEBUG
from shogun.Features import RealFeatures, Labels, DNA, Alphabet
from shogun.Kernel import WeightedDegreeStringKernel, GaussianKernel
from shogun.Classifier import SVMLight
from numpy import concatenate, ones
from numpy.random import randn, seed
import sys
import types
import random
import bz2
import cPickle as pickle
import inspect
def save(filename, myobj):
"""
save object to file using pickle
@param filename: name of destination file
@type filename: str
@param myobj: object to save (has to be pickleable)
@type myobj: obj
"""
try:
f = bz2.BZ2File(filename, 'wb')
except IOError, details:
sys.stderr.write('File ' + filename + ' cannot be written\n')
sys.stderr.write(details)
return
pickle.dump(myobj, f, protocol=2)
f.close()
def load(filename):
"""
Load from filename using pickle
@param filename: name of file to load from
@type filename: str
"""
try:
f = bz2.BZ2File(filename, 'rb')
except IOError, details:
sys.stderr.write('File ' + filename + ' cannot be read\n')
sys.stderr.write(details)
return
myobj = pickle.load(f)
f.close()
return myobj
##################################################
seed(17)
traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1)
testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1);
trainlab=concatenate((-ones(num), ones(num)));
testlab=concatenate((-ones(num), ones(num)));
feats_train=RealFeatures(traindata_real);
feats_test=RealFeatures(testdata_real);
kernel=GaussianKernel(feats_train, feats_train, width);
#kernel.io.set_loglevel(MSG_DEBUG)
labels=Labels(trainlab);
svm=SVMLight(C, kernel, labels)
svm.train()
#svm.io.set_loglevel(MSG_DEBUG)
##################################################
#print "labels:"
#print pickle.dumps(labels)
#
#print "features"
#print pickle.dumps(feats_train)
#
#print "kernel"
#print pickle.dumps(kernel)
#
#print "svm"
#print pickle.dumps(svm)
#
#print "#################################"
fn = "serialized_svm.bz2"
#print "serializing SVM to file", fn
save(fn, svm)
#print "#################################"
#print "unserializing SVM"
svm2 = load(fn)
#print "#################################"
#print "comparing training"
svm2.train()
#print "objective before serialization:", svm.get_objective()
#print "objective after serialization:", svm2.get_objective()
return svm, svm.get_objective(), svm2, svm2.get_objective()
if __name__=='__main__':
print 'Serialization SVMLight'
serialization_svmlight_modular(*parameter_list[0])
from modshogun import StreamingVwFile
from modshogun import StreamingVwCacheFile
from modshogun import T_SVMLIGHT
from modshogun import StreamingVwFeatures
from modshogun import VowpalWabbit
parameter_list=[['../data/fm_train_sparsereal.dat']]
def streaming_vw_createcache_modular(fname):
# First creates a binary cache from an ascii data file.
# and then trains using the StreamingVwCacheFile as input
# Open the input file as a StreamingVwFile
input_file = StreamingVwFile(fname)
# Default file name will be vw_cache.dat.cache
input_file.set_write_to_cache(True)
# Tell VW that the file is in SVMLight format
# Supported types are T_DENSE, T_SVMLIGHT and T_VW
input_file.set_parser_type(T_SVMLIGHT)
# Create a StreamingVwFeatures object, `True' indicating the examples are labelled
features = StreamingVwFeatures(input_file, True, 1024)
# Create a VW object from the features
vw = VowpalWabbit(features)
vw.set_no_training(True)
# Train (in this case does nothing but run over all examples)
vw.train()
#Finally Train using the generated cache file
# Open the input cache file as a StreamingVwCacheFile
input_file = StreamingVwCacheFile("vw_cache.dat.cache");
# The rest is exactly as for normal input
features = StreamingVwFeatures(input_file, True, 1024);
vw = VowpalWabbit(features)
vw.train()
#return vw
if __name__ == "__main__":
streaming_vw_createcache_modular(*parameter_list[0])
from modshogun import StreamingVwFile
from modshogun import T_SVMLIGHT
from modshogun import StreamingVwFeatures
from modshogun import VowpalWabbit
parameter_list=[[None]]
def streaming_vw_modular(dummy):
"""Runs the VW algorithm on a toy dataset in SVMLight format."""
# Open the input file as a StreamingVwFile
input_file = StreamingVwFile("../data/fm_train_sparsereal.dat")
# Tell VW that the file is in SVMLight format
# Supported types are T_DENSE, T_SVMLIGHT and T_VW
input_file.set_parser_type(T_SVMLIGHT)
# Create a StreamingVwFeatures object, `True' indicating the examples are labelled
features = StreamingVwFeatures(input_file, True, 1024)
# Create a VW object from the features
vw = VowpalWabbit(features)
# Train
vw.train()
#return vw
if __name__ == "__main__":
streaming_vw_modular(*parameter_list[0])
#!/usr/bin/env python
# -*- coding: utf-8 -*-
parameter_list=[['../data/DynProg_example_py.pickle.gz']]
from shogun.Structure import *
import numpy
from numpy import array,Inf,float64,matrix,frompyfunc,zeros
#from IPython.Shell import IPShellEmbed
#ipshell = IPShellEmbed()
import gzip
import scipy
from scipy.io import loadmat
import pickle
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
if scipy.__version__ >= '0.7.0':
renametable = {
'scipy.io.mio5': 'scipy.io.matlab.mio5',
'scipy.sparse.sparse' : 'scipy.sparse',
}
else:
renametable = {}
def mapname(name):
if name in renametable:
return renametable[name]
return name
# scipy compatibility class
class mat_struct(object):
pass
def mapped_load_global(self):
module = mapname(self.readline()[:-1])
name = mapname(self.readline()[:-1])
if name=='mat_struct':
klass=mat_struct
else:
klass = self.find_class(module, name)
self.append(klass)
def loads(str):
file = StringIO(str)
unpickler = pickle.Unpickler(file)
unpickler.dispatch[pickle.GLOBAL] = mapped_load_global
return unpickler.load()
def structure_dynprog_modular(fname):
data_dict = loads(gzip.GzipFile(fname).read())
#data_dict = loadmat('../data/DynProg_example_py.dat.mat', appendmat=False, struct_as_record=False)
#print data_dict
#print len(data_dict['penalty_array'][0][0][0][0].limits[0])
num_plifs,num_limits = len(data_dict['penalty_array']),len(data_dict['penalty_array'][0].limits)
pm = PlifMatrix()
pm.create_plifs(num_plifs,num_limits)
ids = numpy.array(range(num_plifs),dtype=numpy.int32)
min_values = numpy.array(range(num_plifs),dtype=numpy.float64)
max_values = numpy.array(range(num_plifs),dtype=numpy.float64)
all_use_cache = numpy.array(range(num_plifs),dtype=numpy.bool)
all_use_svm = numpy.array(range(num_plifs),dtype=numpy.int32)
all_limits = zeros((num_plifs,num_limits))
all_penalties = zeros((num_plifs,num_limits))
all_names = ['']*num_plifs
all_transforms = ['']*num_plifs
for plif_idx in range(num_plifs):
ids[plif_idx] = data_dict['penalty_array'][plif_idx].id-1
min_values[plif_idx] = data_dict['penalty_array'][plif_idx].min_value
max_values[plif_idx] = data_dict['penalty_array'][plif_idx].max_value
all_use_cache[plif_idx] = data_dict['penalty_array'][plif_idx].use_cache
all_use_svm[plif_idx] = data_dict['penalty_array'][plif_idx].use_svm
all_limits[plif_idx] = data_dict['penalty_array'][plif_idx].limits
all_penalties[plif_idx] = data_dict['penalty_array'][plif_idx].penalties
all_names[plif_idx] = str(data_dict['penalty_array'][plif_idx].name)
all_transforms[plif_idx] = str(data_dict['penalty_array'][plif_idx].transform)
if all_transforms[plif_idx] == '[]':
all_transforms[plif_idx] = 'linear'
pm.set_plif_ids(ids)
pm.set_plif_min_values(min_values)
pm.set_plif_max_values(max_values)
pm.set_plif_use_cache(all_use_cache)
pm.set_plif_use_svm(all_use_svm)
pm.set_plif_limits(all_limits)
pm.set_plif_penalties(all_penalties)
#pm.set_plif_names(all_names)
#pm.set_plif_transform_type(all_transforms)
transition_ptrs = data_dict['model'].transition_pointers
transition_ptrs = transition_ptrs[:,:,0:2]
transition_ptrs = transition_ptrs.astype(numpy.float64)
pm.compute_plif_matrix(transition_ptrs)
# init_dyn_prog
num_svms = 8
dyn = DynProg(num_svms)
orf_info = data_dict['model'].orf_info
orf_info = orf_info.astype(numpy.int32)
num_states = orf_info.shape[0]
dyn.set_num_states(num_states)
block = data_dict['block']
seq_len = len(block.seq)
seq = str(block.seq)
gene_string = array([elem for elem in seq])
# precompute_content_svms
pos = block.all_pos-1
pos = pos.astype(numpy.int32)
snd_pos = pos
dyn.set_pos(pos)
dyn.set_gene_string(gene_string)
dyn.create_word_string()
dyn.precompute_stop_codons()
dyn.init_content_svm_value_array(num_svms)
dict_weights = data_dict['content_weights']
dict_weights = dict_weights.reshape(8,1).astype(numpy.float64)
dict_weights = zeros((8,5440))
dyn.set_dict_weights(dict_weights.T)
dyn.precompute_content_values()
dyn.init_mod_words_array(data_dict['model'].mod_words.astype(numpy.int32))
pm.compute_signal_plifs(data_dict['state_signals'].astype(numpy.int32))
dyn.set_orf_info(orf_info)
#
p = data_dict['model'].p
q = data_dict['model'].q
dyn.set_p_vector(p)
dyn.set_q_vector(q)
a_trans = data_dict['a_trans']
a_trans = a_trans.astype(float64)
dyn.set_a_trans_matrix(a_trans)
dyn.check_svm_arrays()
features = data_dict['block'].features
dyn.set_observation_matrix(features)
dyn.set_content_type_array(data_dict['seg_path'].astype(numpy.float64))
dyn.best_path_set_segment_loss(data_dict['loss'].astype(numpy.float64))
use_orf = True
feat_dims = [25,201,2]
dyn.set_plif_matrices(pm);
#dyn.compute_nbest_paths(features.shape[2], use_orf, 1,True,False)
## fetch results
#states = dyn.get_states()
##print states
#scores = dyn.get_scores()
##print scores
#positions = dyn.get_positions()
##print positions
#return states, scores, positions
if __name__ == '__main__':
print "Structure"
structure_dynprog_modular(*parameter_list[0])
parameter_list=[[10,7,0,0]] def tests_check_commwordkernel_memleak_modular(num, order, gap, reverse): import gc from shogun.Features import Alphabet,StringCharFeatures,StringWordFeatures,DNA from shogun.Preprocessor import SortWordString, MSG_DEBUG from shogun.Kernel import CommWordStringKernel, IdentityKernelNormalizer from numpy import mat POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT'] NEG=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT'] for i in xrange(10): alpha=Alphabet(DNA) traindat=StringCharFeatures(alpha) traindat.set_features(POS+NEG) trainudat=StringWordFeatures(traindat.get_alphabet()); trainudat.obtain_from_char(traindat, order-1, order, gap, reverse) #trainudat.io.set_loglevel(MSG_DEBUG) pre = SortWordString() #pre.io.set_loglevel(MSG_DEBUG) pre.init(trainudat) trainudat.add_preprocessor(pre) trainudat.apply_preprocessor() spec = CommWordStringKernel(10, False) spec.set_normalizer(IdentityKernelNormalizer()) spec.init(trainudat, trainudat) K=spec.get_kernel_matrix() del POS del NEG del order del gap del reverse return K if __name__=='__main__': print 'Leak Check Comm Word Kernel' tests_check_commwordkernel_memleak_modular(*parameter_list[0])