SQL'in Hive'daki "ofset" i ile aynı işlevselliği nasıl sağlayabilirim?Ofset işlevselliği Hive
SELECT * from table LIMIT 20 OFFSET 30
teşekkürler!
SQL'in Hive'daki "ofset" i ile aynı işlevselliği nasıl sağlayabilirim?Ofset işlevselliği Hive
SELECT * from table LIMIT 20 OFFSET 30
teşekkürler!
Ben bu davranışı taklit edecek yerleşik bir fonksiyon ya UDF habersiz ama HIVE 0.13
kullanıyorsanız, istenilen sonucu elde etmek için bir tur hakkında şekilde row_number()
işlevini kullanabilirsiniz.
select pk, col_1, col_2, ... , col_n
from (
select pk, col_1, col_2, ... , col_n, row_number() OVER (ORDER by pk) as rank
from some_database.some_table
) x
where rank between 31 and 50
public class CountRatingQueryBuilder {
private static final String SCORING_TABLE_NAME = "web_resource_rating";
private final Connection connection;
private final ScoringMetadata scoringMetadata;
private final SelectSelectStep select;
private final Factory create;
public CountRatingQueryBuilder(Connection connection, ScoringMetadata scoringMetadata){
this.connection = connection;
this.scoringMetadata = scoringMetadata;
create = new Factory(this.connection, SQLDialect.MYSQL);
select = create.select();
withSelectFieldsClause();
}
public CountRatingQueryBuilder withLimit(int limit){
select.limit(limit);
return this;
}
public CountRatingQueryBuilder withRegionId(Integer regionId){
select.where(REGION_ID.field().equal(regionId));
return this;
}
public CountRatingQueryBuilder withResourceTypeId(int resourceTypeId){
select.where(RESOURCE_TYPE_ID.field().equal(resourceTypeId));
return this;
}
public CountRatingQueryBuilder withRequestTimeBetween(long beginTimestamp, long endTimestamp){
select.where(REQUEST_TIME.field().between(beginTimestamp, endTimestamp));
return this;
}
public CountRatingQueryBuilder withResourceId(int resourceId){
select.where(RESOURCE_ID.field().equal(resourceId));
return this;
}
protected void withGroupByClause(){
select.groupBy(REGION_ID.field());
select.groupBy(RESOURCE_TYPE_ID.field());
select.groupBy(RESOURCE_ID.field());
select.groupBy(CONTENT_ID.field());
}
protected void withSelectFieldsClause(){
select.select(REGION_ID.field());
select.select(RESOURCE_TYPE_ID.field());
select.select(CONTENT_ID.field());
select.select(RESOURCE_ID.field());
select.select(Factory.count(HIT_COUNT.field()).as(SUM_HIT_COUNT.fieldName()));
}
protected void withFromClause(){
select.from(SCORING_TABLE_NAME);
}
protected void withOrderByClause(){
select.orderBy(SUM_HIT_COUNT.field().desc());
}
public String build(){
withGroupByClause();
withOrderByClause();
withFromClause();
return select.getSQL().replace("offset ?","");//dirty hack for MySQL dialect. TODO: we can try to implement our own SQL dialect for Hive :)
}
public List<ResultRow> buildAndFetch(){
String sqlWithPlaceholders = build();
List<ResultRow> scoringResults = new ArrayList<ResultRow>(100);
List<Record> recordResults = create.fetch(sqlWithPlaceholders, ArrayUtils.subarray(select.getBindValues().toArray(new Object[select.getBindValues().size()]),0, select.getBindValues().size()-1));//select.fetch();
for(Record record : recordResults){
ResultRowBuilder resultRowBuilder = ResultRowBuilder.create();
resultRowBuilder.withContentType(scoringMetadata.getResourceType(record.getValue(RESOURCE_TYPE_ID.fieldName(), Integer.class)));
resultRowBuilder.withHitCount(record.getValue(SUM_HIT_COUNT.fieldName(), Long.class));
resultRowBuilder.withUrl(record.getValue(CONTENT_ID.fieldName(), String.class));
scoringResults.add(resultRowBuilder.build());
}
return scoringResults;
}
}
Umut bu aşağıdaki bağlantıdan kopyalanır doğru cevap: ayrıntılı olarak anlamak için jooq extend existing dialect. Adopt MySQL dialect to apache Hive dialect bakınız.
Bu, 'MySQL'de sınır 20, 30 'işleviyle aynı işleve sahiptir. – nervosol
http://stackoverflow.com/questions/11750312/hive-ql-limiting-number-of-rows-per-each-item –